590 lines
21 KiB
Swift
590 lines
21 KiB
Swift
// SessionManager.swift — Focus session state, native VLM screen analysis
|
|
// Screenshot capture → Gemini Vision API → apply results to UI + post to backend.
|
|
// No Python subprocess. No external process management.
|
|
|
|
import AppKit
|
|
import SwiftUI
|
|
import UserNotifications
|
|
import ScreenCaptureKit
|
|
|
|
@Observable
|
|
@MainActor
|
|
final class SessionManager {
|
|
static let shared = SessionManager()
|
|
|
|
// MARK: - State
|
|
|
|
var activeSession: FocusSession?
|
|
var activeTask: AppTask?
|
|
var activeSteps: [Step] = []
|
|
var currentStepIndex: Int = 0
|
|
var isSessionActive: Bool = false
|
|
var sessionStartDate: Date?
|
|
var distractionCount: Int = 0
|
|
var lastNudge: String?
|
|
var resumeCard: ResumeCard?
|
|
var showingResumeCard: Bool = false
|
|
var errorMessage: String?
|
|
var isLoading: Bool = false
|
|
|
|
// VLM / proactive agent
|
|
var proactiveCard: ProactiveCard?
|
|
var latestVlmSummary: String?
|
|
var latestInferredTask: String?
|
|
var isExecuting: Bool = false
|
|
var executorOutput: (title: String, content: String)?
|
|
var monitoringError: String?
|
|
|
|
// Screenshot engine
|
|
var isCapturing: Bool = false
|
|
|
|
@ObservationIgnored private var captureTask: Task<Void, Never>?
|
|
private let captureInterval: TimeInterval = 5.0
|
|
|
|
// Frame buffer — accumulate N frames before calling VLM for temporal diff context
|
|
@ObservationIgnored private var frameBuffer: [Data] = []
|
|
private let framesPerVLMCall = 3
|
|
|
|
// Rolling summary history fed as context into subsequent VLM calls
|
|
private struct HistoryEntry { let summary: String; let timestamp: Date }
|
|
@ObservationIgnored private var screenshotHistory: [HistoryEntry] = []
|
|
|
|
// App switch tracking
|
|
@ObservationIgnored private var appSwitches: [(name: String, bundleId: String, time: Date)] = []
|
|
@ObservationIgnored private var appSwitchObserver: (any NSObjectProtocol)?
|
|
@ObservationIgnored private var lastApp: (name: String, bundleId: String) = ("", "")
|
|
@ObservationIgnored private var lastAppEnteredAt: Date = Date()
|
|
|
|
// Proactive card auto-dismiss timer
|
|
@ObservationIgnored private var proactiveCardTimer: Task<Void, Never>?
|
|
|
|
private init() {}
|
|
|
|
// MARK: - Computed
|
|
|
|
var currentStep: Step? {
|
|
guard currentStepIndex < activeSteps.count else { return nil }
|
|
return activeSteps[currentStepIndex]
|
|
}
|
|
|
|
var completedCount: Int { activeSteps.filter(\.isDone).count }
|
|
var totalSteps: Int { activeSteps.count }
|
|
|
|
var sessionElapsed: TimeInterval {
|
|
guard let start = sessionStartDate else { return 0 }
|
|
return Date().timeIntervalSince(start)
|
|
}
|
|
|
|
// MARK: - Monitoring Lifecycle
|
|
|
|
/// Immediately shuts down all monitoring without making any API calls.
|
|
func stopMonitoring() {
|
|
stopCapture()
|
|
stopAppObserver()
|
|
proactiveCardTimer?.cancel()
|
|
proactiveCardTimer = nil
|
|
activeSession = nil
|
|
activeTask = nil
|
|
activeSteps = []
|
|
isSessionActive = false
|
|
sessionStartDate = nil
|
|
lastNudge = nil
|
|
resumeCard = nil
|
|
showingResumeCard = false
|
|
proactiveCard = nil
|
|
latestVlmSummary = nil
|
|
latestInferredTask = nil
|
|
isExecuting = false
|
|
executorOutput = nil
|
|
monitoringError = nil
|
|
screenshotHistory = []
|
|
frameBuffer = []
|
|
persistedSessionId = nil
|
|
}
|
|
|
|
/// Called once after login. Auto-resumes any existing active session and starts the capture loop.
|
|
func startMonitoring() async {
|
|
guard TokenStore.shared.token != nil else { return }
|
|
guard !isCapturing else { return }
|
|
|
|
monitoringError = nil
|
|
await requestNotificationPermission()
|
|
|
|
// Silent preflight — never shows UI; only request permission if not yet granted.
|
|
if !CGPreflightScreenCaptureAccess() {
|
|
CGRequestScreenCaptureAccess()
|
|
monitoringError = "Screen Recording permission required — enable in System Settings → Privacy & Security → Screen Recording, then tap Retry"
|
|
return
|
|
}
|
|
|
|
do {
|
|
if let existing = try await APIClient.shared.getActiveSession() {
|
|
await autoResumeSession(existing)
|
|
} else {
|
|
startCapture()
|
|
startAppObserver()
|
|
}
|
|
} catch {
|
|
startCapture()
|
|
startAppObserver()
|
|
}
|
|
}
|
|
|
|
/// Silently resume an active session found on the backend (no loading UI shown).
|
|
private func autoResumeSession(_ session: FocusSession) async {
|
|
activeSession = session
|
|
persistedSessionId = session.id
|
|
isSessionActive = true
|
|
sessionStartDate = Date()
|
|
distractionCount = 0
|
|
lastNudge = nil
|
|
screenshotHistory = []
|
|
frameBuffer = []
|
|
|
|
if let taskId = session.taskId {
|
|
do {
|
|
let tasks = try await APIClient.shared.getTasks()
|
|
activeTask = tasks.first(where: { $0.id == taskId })
|
|
if let task = activeTask {
|
|
let steps = try await APIClient.shared.getSteps(taskId: task.id)
|
|
activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder }
|
|
currentStepIndex = activeSteps.firstIndex(where: { $0.isActive })
|
|
?? activeSteps.firstIndex(where: { $0.status == "pending" })
|
|
?? 0
|
|
}
|
|
} catch {}
|
|
}
|
|
|
|
let shortId = String(session.id.prefix(8))
|
|
let taskLabel = activeTask?.title ?? "(no task)"
|
|
latestVlmSummary = "Resumed session \(shortId) · \(taskLabel)"
|
|
|
|
startCapture()
|
|
startAppObserver()
|
|
}
|
|
|
|
// MARK: - Session Lifecycle
|
|
|
|
private var persistedSessionId: String? {
|
|
get { UserDefaults.standard.string(forKey: "lockInBro.lastSessionId") }
|
|
set {
|
|
if let v = newValue { UserDefaults.standard.set(v, forKey: "lockInBro.lastSessionId") }
|
|
else { UserDefaults.standard.removeObject(forKey: "lockInBro.lastSessionId") }
|
|
}
|
|
}
|
|
|
|
func startSession(task: AppTask?) async {
|
|
isLoading = true
|
|
errorMessage = nil
|
|
do {
|
|
// End any existing session first
|
|
var staleId: String? = activeSession?.id ?? persistedSessionId
|
|
if staleId == nil {
|
|
staleId = (try? await APIClient.shared.getActiveSession())?.id
|
|
}
|
|
if let id = staleId {
|
|
_ = try? await APIClient.shared.endSession(sessionId: id, status: "completed")
|
|
}
|
|
|
|
let session = try await APIClient.shared.startSession(taskId: task?.id)
|
|
activeSession = session
|
|
persistedSessionId = session.id
|
|
activeTask = task
|
|
activeSteps = []
|
|
currentStepIndex = 0
|
|
isSessionActive = true
|
|
sessionStartDate = Date()
|
|
distractionCount = 0
|
|
lastNudge = nil
|
|
screenshotHistory = []
|
|
frameBuffer = []
|
|
|
|
if let task {
|
|
let steps = try await APIClient.shared.getSteps(taskId: task.id)
|
|
activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder }
|
|
currentStepIndex = activeSteps.firstIndex(where: { $0.isActive })
|
|
?? activeSteps.firstIndex(where: { $0.status == "pending" })
|
|
?? 0
|
|
}
|
|
|
|
await requestNotificationPermission()
|
|
// Restart capture loop (in case it wasn't running or was in monitoring-only mode)
|
|
stopCapture()
|
|
startCapture()
|
|
if appSwitchObserver == nil { startAppObserver() }
|
|
} catch {
|
|
errorMessage = error.localizedDescription
|
|
}
|
|
isLoading = false
|
|
}
|
|
|
|
func endSession(status: String = "completed") async {
|
|
stopCapture()
|
|
stopAppObserver()
|
|
if let session = activeSession {
|
|
_ = try? await APIClient.shared.endSession(sessionId: session.id, status: status)
|
|
}
|
|
activeSession = nil
|
|
activeTask = nil
|
|
activeSteps = []
|
|
isSessionActive = false
|
|
sessionStartDate = nil
|
|
lastNudge = nil
|
|
resumeCard = nil
|
|
showingResumeCard = false
|
|
proactiveCard = nil
|
|
latestVlmSummary = nil
|
|
latestInferredTask = nil
|
|
isExecuting = false
|
|
executorOutput = nil
|
|
proactiveCardTimer?.cancel()
|
|
proactiveCardTimer = nil
|
|
screenshotHistory = []
|
|
frameBuffer = []
|
|
persistedSessionId = nil
|
|
|
|
// Keep the capture loop running for app-switch heuristics
|
|
if TokenStore.shared.token != nil {
|
|
startCapture()
|
|
startAppObserver()
|
|
}
|
|
}
|
|
|
|
func fetchResumeCard() async {
|
|
guard let session = activeSession else { return }
|
|
do {
|
|
let response = try await APIClient.shared.resumeSession(sessionId: session.id)
|
|
resumeCard = response.resumeCard
|
|
showingResumeCard = true
|
|
} catch {
|
|
errorMessage = error.localizedDescription
|
|
}
|
|
}
|
|
|
|
func completeCurrentStep() async {
|
|
guard let step = currentStep else { return }
|
|
do {
|
|
let updated = try await APIClient.shared.completeStep(stepId: step.id)
|
|
if let idx = activeSteps.firstIndex(where: { $0.id == updated.id }) {
|
|
activeSteps[idx] = updated
|
|
}
|
|
if let next = activeSteps.firstIndex(where: { $0.status == "pending" }) {
|
|
currentStepIndex = next
|
|
}
|
|
} catch {
|
|
errorMessage = error.localizedDescription
|
|
}
|
|
}
|
|
|
|
// MARK: - Retry (HUD Retry button)
|
|
|
|
func retryMonitoring() {
|
|
monitoringError = nil
|
|
frameBuffer = []
|
|
stopCapture()
|
|
startCapture()
|
|
if appSwitchObserver == nil { startAppObserver() }
|
|
}
|
|
|
|
// MARK: - Proactive Card Lifecycle
|
|
|
|
private func showProactiveCard(_ card: ProactiveCard) {
|
|
proactiveCardTimer?.cancel()
|
|
withAnimation { proactiveCard = card }
|
|
|
|
proactiveCardTimer = Task { [weak self] in
|
|
try? await Task.sleep(for: .seconds(15))
|
|
guard !Task.isCancelled, let self else { return }
|
|
await MainActor.run { self.dismissProactiveCard() }
|
|
}
|
|
}
|
|
|
|
func dismissProactiveCard() {
|
|
proactiveCardTimer?.cancel()
|
|
proactiveCardTimer = nil
|
|
withAnimation { proactiveCard = nil }
|
|
}
|
|
|
|
func approveProactiveCard(actionIndex: Int) {
|
|
proactiveCardTimer?.cancel()
|
|
proactiveCardTimer = nil
|
|
let card = proactiveCard
|
|
withAnimation { proactiveCard = nil }
|
|
guard case .vlmFriction(_, _, let actions) = card?.source,
|
|
actionIndex < actions.count else { return }
|
|
let action = actions[actionIndex]
|
|
isExecuting = true
|
|
Task {
|
|
do {
|
|
let screenshot = await captureScreen()
|
|
let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? ""
|
|
guard !geminiKey.isEmpty else {
|
|
isExecuting = false
|
|
executorOutput = (title: action.label, content: action.details ?? "Action approved.")
|
|
return
|
|
}
|
|
let client = GeminiVLMClient(apiKey: geminiKey)
|
|
let result = try await client.executeAction(
|
|
label: action.label,
|
|
actionType: action.actionType,
|
|
details: action.details ?? "",
|
|
screenshot: screenshot
|
|
)
|
|
isExecuting = false
|
|
executorOutput = (title: action.label, content: result)
|
|
} catch {
|
|
isExecuting = false
|
|
executorOutput = (title: action.label, content: action.details ?? "Couldn't complete automatically.")
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - App Switch Observer
|
|
|
|
private func startAppObserver() {
|
|
let current = NSWorkspace.shared.frontmostApplication
|
|
lastApp = (current?.localizedName ?? "", current?.bundleIdentifier ?? "")
|
|
lastAppEnteredAt = Date()
|
|
appSwitches = []
|
|
|
|
appSwitchObserver = NSWorkspace.shared.notificationCenter.addObserver(
|
|
forName: NSWorkspace.didActivateApplicationNotification,
|
|
object: nil,
|
|
queue: .main
|
|
) { [weak self] notification in
|
|
guard let app = notification.userInfo?[NSWorkspace.applicationUserInfoKey] as? NSRunningApplication
|
|
else { return }
|
|
Task { @MainActor [weak self] in self?.handleAppSwitch(app: app) }
|
|
}
|
|
}
|
|
|
|
private func stopAppObserver() {
|
|
if let observer = appSwitchObserver {
|
|
NSWorkspace.shared.notificationCenter.removeObserver(observer)
|
|
appSwitchObserver = nil
|
|
}
|
|
appSwitches = []
|
|
}
|
|
|
|
private func handleAppSwitch(app: NSRunningApplication) {
|
|
let name = app.localizedName ?? "Unknown"
|
|
let bundleId = app.bundleIdentifier ?? ""
|
|
let now = Date()
|
|
|
|
guard name != lastApp.name else { return }
|
|
|
|
// Log previous app dwell time to backend
|
|
let duration = max(1, Int(now.timeIntervalSince(lastAppEnteredAt)))
|
|
let prev = lastApp
|
|
if let session = activeSession, !prev.name.isEmpty {
|
|
Task {
|
|
_ = try? await APIClient.shared.appActivity(
|
|
sessionId: session.id,
|
|
appBundleId: prev.bundleId,
|
|
appName: prev.name,
|
|
durationSeconds: duration
|
|
)
|
|
}
|
|
}
|
|
|
|
lastApp = (name, bundleId)
|
|
lastAppEnteredAt = now
|
|
|
|
appSwitches.append((name: name, bundleId: bundleId, time: now))
|
|
if appSwitches.count > 30 { appSwitches.removeFirst() }
|
|
|
|
guard isSessionActive, proactiveCard == nil else { return }
|
|
if let loop = detectRepetitiveLoop() {
|
|
showProactiveCard(ProactiveCard(source: .appSwitchLoop(apps: loop.apps, switchCount: loop.count)))
|
|
}
|
|
}
|
|
|
|
private func detectRepetitiveLoop() -> (apps: [String], count: Int)? {
|
|
let cutoff = Date().addingTimeInterval(-300)
|
|
let recent = appSwitches.filter { $0.time > cutoff }.map(\.name)
|
|
guard recent.count >= 6 else { return nil }
|
|
let last6 = Array(recent.suffix(6))
|
|
guard Set(last6).count == 2 else { return nil }
|
|
for i in 1..<last6.count {
|
|
if last6[i] == last6[i - 1] { return nil }
|
|
}
|
|
return (apps: Array(Set(last6)).sorted(), count: 3)
|
|
}
|
|
|
|
// MARK: - Screenshot Capture Loop
|
|
|
|
private func startCapture() {
|
|
guard !isCapturing else { return }
|
|
isCapturing = true
|
|
captureTask = Task { [weak self] in
|
|
guard let self else { return }
|
|
// Capture immediately, then repeat on interval
|
|
await self.captureAndAnalyze()
|
|
while !Task.isCancelled {
|
|
try? await Task.sleep(for: .seconds(self.captureInterval))
|
|
guard !Task.isCancelled else { break }
|
|
await self.captureAndAnalyze()
|
|
}
|
|
}
|
|
}
|
|
|
|
private func stopCapture() {
|
|
captureTask?.cancel()
|
|
captureTask = nil
|
|
isCapturing = false
|
|
}
|
|
|
|
/// Capture one frame, buffer it, and call VLM every `framesPerVLMCall` frames.
|
|
private func captureAndAnalyze() async {
|
|
guard let imageData = await captureScreen() else { return }
|
|
|
|
frameBuffer.append(imageData)
|
|
// Keep buffer bounded — rolling window of most recent frames
|
|
if frameBuffer.count > framesPerVLMCall { frameBuffer.removeFirst() }
|
|
|
|
// Only call VLM once we have a full batch for temporal diff analysis
|
|
guard frameBuffer.count >= framesPerVLMCall else { return }
|
|
|
|
let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? ""
|
|
guard !geminiKey.isEmpty else {
|
|
print("[VLM] No Gemini API key set — skipping analysis")
|
|
return
|
|
}
|
|
|
|
let client = GeminiVLMClient(apiKey: geminiKey)
|
|
let windowTitle = NSWorkspace.shared.frontmostApplication?.localizedName ?? ""
|
|
let recentSummaries = screenshotHistory.map(\.summary)
|
|
let frames = frameBuffer // snapshot before async gap
|
|
|
|
do {
|
|
print("[VLM] Calling Gemini with \(frames.count) frames…")
|
|
let result = try await client.analyze(
|
|
frames: frames,
|
|
taskTitle: activeTask?.title ?? "",
|
|
taskGoal: activeTask?.description ?? "",
|
|
steps: activeSteps,
|
|
windowTitle: windowTitle,
|
|
recentSummaries: recentSummaries
|
|
)
|
|
print("[VLM] Result: on_task=\(result.onTask), friction=\(result.friction?.type ?? "none"), summary=\(result.vlmSummary ?? "")")
|
|
|
|
// Append to rolling summary history
|
|
if let summary = result.vlmSummary, !summary.isEmpty {
|
|
screenshotHistory.append(HistoryEntry(summary: summary, timestamp: Date()))
|
|
if screenshotHistory.count > 4 { screenshotHistory.removeFirst() }
|
|
}
|
|
|
|
// Clear frame buffer — next batch starts fresh
|
|
frameBuffer.removeAll()
|
|
|
|
monitoringError = nil
|
|
applyDistractionResult(result)
|
|
|
|
// Post result to backend (fire-and-forget)
|
|
if let session = activeSession {
|
|
Task {
|
|
try? await APIClient.shared.postAnalysisResult(result, sessionId: session.id)
|
|
}
|
|
}
|
|
} catch {
|
|
print("[VLM] Analysis error: \(error)")
|
|
// Don't surface transient errors — the next attempt will retry automatically
|
|
}
|
|
}
|
|
|
|
// MARK: - Screen Capture
|
|
|
|
private func captureScreen() async -> Data? {
|
|
guard CGPreflightScreenCaptureAccess() else { return nil }
|
|
do {
|
|
let content = try await SCShareableContent.current
|
|
guard let display = content.displays.first else { return nil }
|
|
let config = SCStreamConfiguration()
|
|
config.width = 1280
|
|
config.height = 720
|
|
let filter = SCContentFilter(display: display, excludingWindows: [])
|
|
let image = try await SCScreenshotManager.captureImage(
|
|
contentFilter: filter, configuration: config)
|
|
return cgImageToJPEG(image)
|
|
} catch {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
private func cgImageToJPEG(_ image: CGImage) -> Data? {
|
|
let nsImage = NSImage(cgImage: image, size: .zero)
|
|
guard let tiff = nsImage.tiffRepresentation,
|
|
let bitmap = NSBitmapImageRep(data: tiff),
|
|
let jpeg = bitmap.representation(using: .jpeg, properties: [.compressionFactor: 0.5])
|
|
else { return nil }
|
|
return jpeg
|
|
}
|
|
|
|
// MARK: - Apply VLM Result
|
|
|
|
private func applyDistractionResult(_ result: DistractionAnalysisResponse) {
|
|
if let summary = result.vlmSummary { latestVlmSummary = summary }
|
|
if let task = result.inferredTask, !task.isEmpty { latestInferredTask = task }
|
|
|
|
// Apply step side-effects
|
|
for completedId in result.stepsCompleted {
|
|
if let idx = activeSteps.firstIndex(where: { $0.id == completedId }) {
|
|
activeSteps[idx].status = "done"
|
|
}
|
|
}
|
|
if let note = result.checkpointNoteUpdate,
|
|
let stepId = result.currentStepId,
|
|
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
|
|
activeSteps[idx].checkpointNote = note
|
|
}
|
|
if let stepId = result.currentStepId,
|
|
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
|
|
currentStepIndex = idx
|
|
}
|
|
|
|
// Notification priority: friction card (formal or has actions) → nudge
|
|
if let friction = result.friction {
|
|
let shouldShow = friction.isActionable || !friction.proposedActions.isEmpty
|
|
if shouldShow {
|
|
if friction.isResumption {
|
|
Task { await fetchResumeCard() }
|
|
} else if proactiveCard == nil {
|
|
showProactiveCard(ProactiveCard(source: .vlmFriction(
|
|
frictionType: friction.type,
|
|
description: friction.description,
|
|
actions: friction.proposedActions
|
|
)))
|
|
}
|
|
} else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge {
|
|
distractionCount += 1
|
|
lastNudge = nudge
|
|
sendNudgeNotification(nudge)
|
|
}
|
|
} else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge {
|
|
distractionCount += 1
|
|
lastNudge = nudge
|
|
sendNudgeNotification(nudge)
|
|
}
|
|
}
|
|
|
|
// MARK: - Notifications
|
|
|
|
private func sendNudgeNotification(_ nudge: String) {
|
|
let content = UNMutableNotificationContent()
|
|
content.title = "Hey, quick check-in!"
|
|
content.body = nudge
|
|
content.sound = .default
|
|
let req = UNNotificationRequest(
|
|
identifier: UUID().uuidString,
|
|
content: content,
|
|
trigger: nil
|
|
)
|
|
UNUserNotificationCenter.current().add(req)
|
|
}
|
|
|
|
private func requestNotificationPermission() async {
|
|
try? await UNUserNotificationCenter.current()
|
|
.requestAuthorization(options: [.alert, .sound])
|
|
}
|
|
}
|