Files
LockInBroMacOS/LockInBro/SessionManager.swift

590 lines
21 KiB
Swift
Raw Normal View History

2026-03-29 06:29:18 -04:00
// SessionManager.swift Focus session state, native VLM screen analysis
// Screenshot capture Gemini Vision API apply results to UI + post to backend.
// No Python subprocess. No external process management.
import AppKit
import SwiftUI
import UserNotifications
import ScreenCaptureKit
@Observable
@MainActor
final class SessionManager {
static let shared = SessionManager()
// MARK: - State
var activeSession: FocusSession?
var activeTask: AppTask?
var activeSteps: [Step] = []
var currentStepIndex: Int = 0
var isSessionActive: Bool = false
var sessionStartDate: Date?
var distractionCount: Int = 0
var lastNudge: String?
var resumeCard: ResumeCard?
var showingResumeCard: Bool = false
var errorMessage: String?
var isLoading: Bool = false
2026-03-29 06:29:18 -04:00
// VLM / proactive agent
var proactiveCard: ProactiveCard?
2026-03-29 00:58:22 -04:00
var latestVlmSummary: String?
2026-03-29 06:29:18 -04:00
var latestInferredTask: String?
2026-03-29 00:58:22 -04:00
var isExecuting: Bool = false
var executorOutput: (title: String, content: String)?
2026-03-29 06:29:18 -04:00
var monitoringError: String?
// Screenshot engine
var isCapturing: Bool = false
2026-03-29 06:29:18 -04:00
@ObservationIgnored private var captureTask: Task<Void, Never>?
private let captureInterval: TimeInterval = 5.0
2026-03-29 06:29:18 -04:00
// Frame buffer accumulate N frames before calling VLM for temporal diff context
@ObservationIgnored private var frameBuffer: [Data] = []
private let framesPerVLMCall = 3
// Rolling summary history fed as context into subsequent VLM calls
private struct HistoryEntry { let summary: String; let timestamp: Date }
@ObservationIgnored private var screenshotHistory: [HistoryEntry] = []
// App switch tracking
@ObservationIgnored private var appSwitches: [(name: String, bundleId: String, time: Date)] = []
@ObservationIgnored private var appSwitchObserver: (any NSObjectProtocol)?
@ObservationIgnored private var lastApp: (name: String, bundleId: String) = ("", "")
@ObservationIgnored private var lastAppEnteredAt: Date = Date()
2026-03-29 06:29:18 -04:00
// Proactive card auto-dismiss timer
2026-03-29 00:58:22 -04:00
@ObservationIgnored private var proactiveCardTimer: Task<Void, Never>?
private init() {}
// MARK: - Computed
var currentStep: Step? {
guard currentStepIndex < activeSteps.count else { return nil }
return activeSteps[currentStepIndex]
}
var completedCount: Int { activeSteps.filter(\.isDone).count }
var totalSteps: Int { activeSteps.count }
var sessionElapsed: TimeInterval {
guard let start = sessionStartDate else { return 0 }
return Date().timeIntervalSince(start)
}
2026-03-29 06:29:18 -04:00
// MARK: - Monitoring Lifecycle
/// Immediately shuts down all monitoring without making any API calls.
func stopMonitoring() {
stopCapture()
stopAppObserver()
proactiveCardTimer?.cancel()
proactiveCardTimer = nil
activeSession = nil
activeTask = nil
activeSteps = []
isSessionActive = false
sessionStartDate = nil
lastNudge = nil
resumeCard = nil
showingResumeCard = false
proactiveCard = nil
latestVlmSummary = nil
latestInferredTask = nil
isExecuting = false
executorOutput = nil
monitoringError = nil
screenshotHistory = []
frameBuffer = []
persistedSessionId = nil
}
/// Called once after login. Auto-resumes any existing active session and starts the capture loop.
func startMonitoring() async {
guard TokenStore.shared.token != nil else { return }
guard !isCapturing else { return }
monitoringError = nil
await requestNotificationPermission()
// Silent preflight never shows UI; only request permission if not yet granted.
if !CGPreflightScreenCaptureAccess() {
CGRequestScreenCaptureAccess()
monitoringError = "Screen Recording permission required — enable in System Settings → Privacy & Security → Screen Recording, then tap Retry"
return
}
do {
if let existing = try await APIClient.shared.getActiveSession() {
await autoResumeSession(existing)
} else {
startCapture()
startAppObserver()
}
} catch {
startCapture()
startAppObserver()
}
}
/// Silently resume an active session found on the backend (no loading UI shown).
private func autoResumeSession(_ session: FocusSession) async {
activeSession = session
persistedSessionId = session.id
isSessionActive = true
sessionStartDate = Date()
distractionCount = 0
lastNudge = nil
screenshotHistory = []
frameBuffer = []
if let taskId = session.taskId {
do {
let tasks = try await APIClient.shared.getTasks()
activeTask = tasks.first(where: { $0.id == taskId })
if let task = activeTask {
let steps = try await APIClient.shared.getSteps(taskId: task.id)
activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder }
currentStepIndex = activeSteps.firstIndex(where: { $0.isActive })
?? activeSteps.firstIndex(where: { $0.status == "pending" })
?? 0
}
} catch {}
}
let shortId = String(session.id.prefix(8))
let taskLabel = activeTask?.title ?? "(no task)"
latestVlmSummary = "Resumed session \(shortId) · \(taskLabel)"
startCapture()
startAppObserver()
}
// MARK: - Session Lifecycle
private var persistedSessionId: String? {
get { UserDefaults.standard.string(forKey: "lockInBro.lastSessionId") }
set {
if let v = newValue { UserDefaults.standard.set(v, forKey: "lockInBro.lastSessionId") }
else { UserDefaults.standard.removeObject(forKey: "lockInBro.lastSessionId") }
}
}
func startSession(task: AppTask?) async {
isLoading = true
errorMessage = nil
do {
2026-03-29 06:29:18 -04:00
// End any existing session first
var staleId: String? = activeSession?.id ?? persistedSessionId
if staleId == nil {
staleId = (try? await APIClient.shared.getActiveSession())?.id
}
2026-03-29 06:29:18 -04:00
if let id = staleId {
_ = try? await APIClient.shared.endSession(sessionId: id, status: "completed")
}
let session = try await APIClient.shared.startSession(taskId: task?.id)
activeSession = session
persistedSessionId = session.id
activeTask = task
activeSteps = []
currentStepIndex = 0
isSessionActive = true
sessionStartDate = Date()
distractionCount = 0
lastNudge = nil
2026-03-29 06:29:18 -04:00
screenshotHistory = []
frameBuffer = []
if let task {
let steps = try await APIClient.shared.getSteps(taskId: task.id)
activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder }
currentStepIndex = activeSteps.firstIndex(where: { $0.isActive })
?? activeSteps.firstIndex(where: { $0.status == "pending" })
?? 0
}
await requestNotificationPermission()
2026-03-29 06:29:18 -04:00
// Restart capture loop (in case it wasn't running or was in monitoring-only mode)
stopCapture()
startCapture()
if appSwitchObserver == nil { startAppObserver() }
} catch {
errorMessage = error.localizedDescription
}
isLoading = false
}
func endSession(status: String = "completed") async {
stopCapture()
stopAppObserver()
if let session = activeSession {
_ = try? await APIClient.shared.endSession(sessionId: session.id, status: status)
}
activeSession = nil
activeTask = nil
activeSteps = []
isSessionActive = false
sessionStartDate = nil
lastNudge = nil
resumeCard = nil
showingResumeCard = false
proactiveCard = nil
2026-03-29 00:58:22 -04:00
latestVlmSummary = nil
2026-03-29 06:29:18 -04:00
latestInferredTask = nil
2026-03-29 00:58:22 -04:00
isExecuting = false
executorOutput = nil
proactiveCardTimer?.cancel()
proactiveCardTimer = nil
screenshotHistory = []
2026-03-29 06:29:18 -04:00
frameBuffer = []
persistedSessionId = nil
2026-03-29 06:29:18 -04:00
// Keep the capture loop running for app-switch heuristics
if TokenStore.shared.token != nil {
startCapture()
startAppObserver()
}
}
func fetchResumeCard() async {
guard let session = activeSession else { return }
do {
let response = try await APIClient.shared.resumeSession(sessionId: session.id)
resumeCard = response.resumeCard
showingResumeCard = true
} catch {
errorMessage = error.localizedDescription
}
}
func completeCurrentStep() async {
guard let step = currentStep else { return }
do {
let updated = try await APIClient.shared.completeStep(stepId: step.id)
if let idx = activeSteps.firstIndex(where: { $0.id == updated.id }) {
activeSteps[idx] = updated
}
if let next = activeSteps.firstIndex(where: { $0.status == "pending" }) {
currentStepIndex = next
}
} catch {
errorMessage = error.localizedDescription
}
}
2026-03-29 06:29:18 -04:00
// MARK: - Retry (HUD Retry button)
func retryMonitoring() {
monitoringError = nil
frameBuffer = []
stopCapture()
startCapture()
if appSwitchObserver == nil { startAppObserver() }
}
2026-03-29 00:58:22 -04:00
// MARK: - Proactive Card Lifecycle
2026-03-29 06:29:18 -04:00
private func showProactiveCard(_ card: ProactiveCard) {
2026-03-29 00:58:22 -04:00
proactiveCardTimer?.cancel()
withAnimation { proactiveCard = card }
proactiveCardTimer = Task { [weak self] in
try? await Task.sleep(for: .seconds(15))
guard !Task.isCancelled, let self else { return }
await MainActor.run { self.dismissProactiveCard() }
}
}
func dismissProactiveCard() {
proactiveCardTimer?.cancel()
proactiveCardTimer = nil
withAnimation { proactiveCard = nil }
}
func approveProactiveCard(actionIndex: Int) {
proactiveCardTimer?.cancel()
proactiveCardTimer = nil
2026-03-29 06:29:18 -04:00
let card = proactiveCard
2026-03-29 00:58:22 -04:00
withAnimation { proactiveCard = nil }
2026-03-29 06:29:18 -04:00
guard case .vlmFriction(_, _, let actions) = card?.source,
actionIndex < actions.count else { return }
let action = actions[actionIndex]
isExecuting = true
Task {
do {
let screenshot = await captureScreen()
let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? ""
guard !geminiKey.isEmpty else {
isExecuting = false
executorOutput = (title: action.label, content: action.details ?? "Action approved.")
return
}
let client = GeminiVLMClient(apiKey: geminiKey)
let result = try await client.executeAction(
label: action.label,
actionType: action.actionType,
details: action.details ?? "",
screenshot: screenshot
)
isExecuting = false
executorOutput = (title: action.label, content: result)
} catch {
isExecuting = false
executorOutput = (title: action.label, content: action.details ?? "Couldn't complete automatically.")
}
2026-03-29 00:58:22 -04:00
}
}
// MARK: - App Switch Observer
private func startAppObserver() {
let current = NSWorkspace.shared.frontmostApplication
lastApp = (current?.localizedName ?? "", current?.bundleIdentifier ?? "")
lastAppEnteredAt = Date()
appSwitches = []
appSwitchObserver = NSWorkspace.shared.notificationCenter.addObserver(
forName: NSWorkspace.didActivateApplicationNotification,
object: nil,
queue: .main
) { [weak self] notification in
guard let app = notification.userInfo?[NSWorkspace.applicationUserInfoKey] as? NSRunningApplication
else { return }
Task { @MainActor [weak self] in self?.handleAppSwitch(app: app) }
}
}
private func stopAppObserver() {
if let observer = appSwitchObserver {
NSWorkspace.shared.notificationCenter.removeObserver(observer)
appSwitchObserver = nil
}
appSwitches = []
}
private func handleAppSwitch(app: NSRunningApplication) {
let name = app.localizedName ?? "Unknown"
let bundleId = app.bundleIdentifier ?? ""
let now = Date()
guard name != lastApp.name else { return }
2026-03-29 06:29:18 -04:00
// Log previous app dwell time to backend
let duration = max(1, Int(now.timeIntervalSince(lastAppEnteredAt)))
let prev = lastApp
if let session = activeSession, !prev.name.isEmpty {
Task {
_ = try? await APIClient.shared.appActivity(
sessionId: session.id,
appBundleId: prev.bundleId,
appName: prev.name,
durationSeconds: duration
)
}
}
lastApp = (name, bundleId)
lastAppEnteredAt = now
appSwitches.append((name: name, bundleId: bundleId, time: now))
if appSwitches.count > 30 { appSwitches.removeFirst() }
guard isSessionActive, proactiveCard == nil else { return }
if let loop = detectRepetitiveLoop() {
2026-03-29 06:29:18 -04:00
showProactiveCard(ProactiveCard(source: .appSwitchLoop(apps: loop.apps, switchCount: loop.count)))
}
}
private func detectRepetitiveLoop() -> (apps: [String], count: Int)? {
let cutoff = Date().addingTimeInterval(-300)
let recent = appSwitches.filter { $0.time > cutoff }.map(\.name)
guard recent.count >= 6 else { return nil }
let last6 = Array(recent.suffix(6))
guard Set(last6).count == 2 else { return nil }
for i in 1..<last6.count {
if last6[i] == last6[i - 1] { return nil }
}
return (apps: Array(Set(last6)).sorted(), count: 3)
}
2026-03-29 06:29:18 -04:00
// MARK: - Screenshot Capture Loop
private func startCapture() {
2026-03-29 06:29:18 -04:00
guard !isCapturing else { return }
isCapturing = true
captureTask = Task { [weak self] in
guard let self else { return }
2026-03-29 06:29:18 -04:00
// Capture immediately, then repeat on interval
await self.captureAndAnalyze()
2026-03-29 06:29:18 -04:00
while !Task.isCancelled {
try? await Task.sleep(for: .seconds(self.captureInterval))
2026-03-29 06:29:18 -04:00
guard !Task.isCancelled else { break }
await self.captureAndAnalyze()
}
}
}
private func stopCapture() {
captureTask?.cancel()
captureTask = nil
isCapturing = false
}
2026-03-29 06:29:18 -04:00
/// Capture one frame, buffer it, and call VLM every `framesPerVLMCall` frames.
private func captureAndAnalyze() async {
guard let imageData = await captureScreen() else { return }
2026-03-29 06:29:18 -04:00
frameBuffer.append(imageData)
// Keep buffer bounded rolling window of most recent frames
if frameBuffer.count > framesPerVLMCall { frameBuffer.removeFirst() }
2026-03-29 06:29:18 -04:00
// Only call VLM once we have a full batch for temporal diff analysis
guard frameBuffer.count >= framesPerVLMCall else { return }
let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? ""
guard !geminiKey.isEmpty else {
print("[VLM] No Gemini API key set — skipping analysis")
return
}
2026-03-29 06:29:18 -04:00
let client = GeminiVLMClient(apiKey: geminiKey)
let windowTitle = NSWorkspace.shared.frontmostApplication?.localizedName ?? ""
let recentSummaries = screenshotHistory.map(\.summary)
let frames = frameBuffer // snapshot before async gap
do {
2026-03-29 06:29:18 -04:00
print("[VLM] Calling Gemini with \(frames.count) frames…")
let result = try await client.analyze(
frames: frames,
taskTitle: activeTask?.title ?? "",
taskGoal: activeTask?.description ?? "",
steps: activeSteps,
windowTitle: windowTitle,
2026-03-29 06:29:18 -04:00
recentSummaries: recentSummaries
)
2026-03-29 06:29:18 -04:00
print("[VLM] Result: on_task=\(result.onTask), friction=\(result.friction?.type ?? "none"), summary=\(result.vlmSummary ?? "")")
2026-03-29 06:29:18 -04:00
// Append to rolling summary history
if let summary = result.vlmSummary, !summary.isEmpty {
screenshotHistory.append(HistoryEntry(summary: summary, timestamp: Date()))
if screenshotHistory.count > 4 { screenshotHistory.removeFirst() }
}
2026-03-29 06:29:18 -04:00
// Clear frame buffer next batch starts fresh
frameBuffer.removeAll()
monitoringError = nil
applyDistractionResult(result)
2026-03-29 06:29:18 -04:00
// Post result to backend (fire-and-forget)
if let session = activeSession {
Task {
try? await APIClient.shared.postAnalysisResult(result, sessionId: session.id)
}
}
} catch {
2026-03-29 06:29:18 -04:00
print("[VLM] Analysis error: \(error)")
// Don't surface transient errors the next attempt will retry automatically
}
}
2026-03-29 06:29:18 -04:00
// MARK: - Screen Capture
private func captureScreen() async -> Data? {
2026-03-29 06:29:18 -04:00
guard CGPreflightScreenCaptureAccess() else { return nil }
do {
let content = try await SCShareableContent.current
guard let display = content.displays.first else { return nil }
let config = SCStreamConfiguration()
config.width = 1280
config.height = 720
let filter = SCContentFilter(display: display, excludingWindows: [])
let image = try await SCScreenshotManager.captureImage(
2026-03-29 06:29:18 -04:00
contentFilter: filter, configuration: config)
return cgImageToJPEG(image)
} catch {
return nil
}
}
private func cgImageToJPEG(_ image: CGImage) -> Data? {
let nsImage = NSImage(cgImage: image, size: .zero)
guard let tiff = nsImage.tiffRepresentation,
let bitmap = NSBitmapImageRep(data: tiff),
let jpeg = bitmap.representation(using: .jpeg, properties: [.compressionFactor: 0.5])
else { return nil }
return jpeg
}
2026-03-29 06:29:18 -04:00
// MARK: - Apply VLM Result
private func applyDistractionResult(_ result: DistractionAnalysisResponse) {
2026-03-29 00:58:22 -04:00
if let summary = result.vlmSummary { latestVlmSummary = summary }
2026-03-29 06:29:18 -04:00
if let task = result.inferredTask, !task.isEmpty { latestInferredTask = task }
2026-03-29 00:58:22 -04:00
2026-03-29 06:29:18 -04:00
// Apply step side-effects
for completedId in result.stepsCompleted {
if let idx = activeSteps.firstIndex(where: { $0.id == completedId }) {
activeSteps[idx].status = "done"
}
}
if let note = result.checkpointNoteUpdate,
let stepId = result.currentStepId,
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
activeSteps[idx].checkpointNote = note
}
if let stepId = result.currentStepId,
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
currentStepIndex = idx
}
2026-03-29 06:29:18 -04:00
// Notification priority: friction card (formal or has actions) nudge
if let friction = result.friction {
let shouldShow = friction.isActionable || !friction.proposedActions.isEmpty
if shouldShow {
if friction.isResumption {
Task { await fetchResumeCard() }
} else if proactiveCard == nil {
showProactiveCard(ProactiveCard(source: .vlmFriction(
frictionType: friction.type,
description: friction.description,
actions: friction.proposedActions
)))
}
} else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge {
distractionCount += 1
lastNudge = nudge
sendNudgeNotification(nudge)
}
} else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge {
distractionCount += 1
lastNudge = nudge
sendNudgeNotification(nudge)
}
}
// MARK: - Notifications
private func sendNudgeNotification(_ nudge: String) {
let content = UNMutableNotificationContent()
content.title = "Hey, quick check-in!"
content.body = nudge
content.sound = .default
let req = UNNotificationRequest(
identifier: UUID().uuidString,
content: content,
trigger: nil
)
UNUserNotificationCenter.current().add(req)
}
private func requestNotificationPermission() async {
try? await UNUserNotificationCenter.current()
.requestAuthorization(options: [.alert, .sound])
}
}