Files
LockInBroMacOS/LockInBro/SessionManager.swift

571 lines
22 KiB
Swift
Raw Normal View History

// SessionManager.swift Focus session state, screenshot engine, distraction detection
import AppKit
import SwiftUI
import UserNotifications
import ScreenCaptureKit
@Observable
@MainActor
final class SessionManager {
static let shared = SessionManager()
// MARK: - State
var activeSession: FocusSession?
var activeTask: AppTask?
var activeSteps: [Step] = []
var currentStepIndex: Int = 0
var isSessionActive: Bool = false
var sessionStartDate: Date?
var distractionCount: Int = 0
var lastNudge: String?
var resumeCard: ResumeCard?
var showingResumeCard: Bool = false
var errorMessage: String?
var isLoading: Bool = false
// Proactive agent
var proactiveCard: ProactiveCard?
/// Set when the user approves a proposed action shown as a confirmation toast
var approvedActionLabel: String?
// Screenshot engine
var isCapturing: Bool = false
/// Live pipeline status shown in FocusSessionView (updated at each stage)
var argusStatus: String = ""
private var captureTask: Task<Void, Never>?
private let captureInterval: TimeInterval = 5.0
// Rolling screenshot history buffer (max 4 entries, ~20-second window)
// Provides temporal context to the VLM so it can detect patterns across captures.
private struct ScreenshotHistoryEntry {
let summary: String // vlm_summary text from the previous analysis
let timestamp: Date
}
@ObservationIgnored private var screenshotHistory: [ScreenshotHistoryEntry] = []
// App switch tracking
@ObservationIgnored private var appSwitches: [(name: String, bundleId: String, time: Date)] = []
@ObservationIgnored private var appSwitchObserver: (any NSObjectProtocol)?
@ObservationIgnored private var lastApp: (name: String, bundleId: String) = ("", "")
@ObservationIgnored private var lastAppEnteredAt: Date = Date()
// Argus subprocess (device-side VLM)
@ObservationIgnored private var argusProcess: Process?
@ObservationIgnored private var argusReadTask: Task<Void, Never>?
private let argusPythonPath = "/Users/joyzhuo/miniconda3/envs/gmr/bin/python3"
private let argusRepoPath = "/Users/joyzhuo/yhack/lockinbro-argus"
private init() {}
// MARK: - Computed
var currentStep: Step? {
guard currentStepIndex < activeSteps.count else { return nil }
return activeSteps[currentStepIndex]
}
var completedCount: Int { activeSteps.filter(\.isDone).count }
var totalSteps: Int { activeSteps.count }
var sessionElapsed: TimeInterval {
guard let start = sessionStartDate else { return 0 }
return Date().timeIntervalSince(start)
}
// MARK: - Session Lifecycle
// Persisted so we can end a stale session after an app restart
private var persistedSessionId: String? {
get { UserDefaults.standard.string(forKey: "lockInBro.lastSessionId") }
set {
if let v = newValue { UserDefaults.standard.set(v, forKey: "lockInBro.lastSessionId") }
else { UserDefaults.standard.removeObject(forKey: "lockInBro.lastSessionId") }
}
}
func startSession(task: AppTask?) async {
isLoading = true
errorMessage = nil
do {
let session: FocusSession
do {
session = try await APIClient.shared.startSession(taskId: task?.id)
} catch NetworkError.httpError(409, _) {
// End whichever session is active prefer the locally known one,
// fall back to the last persisted ID (survives app restarts)
let staleId = activeSession?.id ?? persistedSessionId
if let id = staleId {
_ = try? await APIClient.shared.endSession(sessionId: id, status: "completed")
}
session = try await APIClient.shared.startSession(taskId: task?.id)
}
activeSession = session
persistedSessionId = session.id
activeTask = task
activeSteps = []
currentStepIndex = 0
isSessionActive = true
sessionStartDate = Date()
distractionCount = 0
lastNudge = nil
if let task {
let steps = try await APIClient.shared.getSteps(taskId: task.id)
activeSteps = steps.sorted { $0.sortOrder < $1.sortOrder }
// Pick first in-progress or first pending step
currentStepIndex = activeSteps.firstIndex(where: { $0.isActive })
?? activeSteps.firstIndex(where: { $0.status == "pending" })
?? 0
}
screenshotHistory = []
await requestNotificationPermission()
startArgus(session: session, task: task)
startAppObserver()
} catch {
errorMessage = error.localizedDescription
}
isLoading = false
}
func endSession(status: String = "completed") async {
stopArgus()
stopCapture()
stopAppObserver()
if let session = activeSession {
_ = try? await APIClient.shared.endSession(sessionId: session.id, status: status)
}
activeSession = nil
activeTask = nil
activeSteps = []
isSessionActive = false
sessionStartDate = nil
lastNudge = nil
resumeCard = nil
showingResumeCard = false
proactiveCard = nil
approvedActionLabel = nil
screenshotHistory = []
persistedSessionId = nil
}
func fetchResumeCard() async {
guard let session = activeSession else { return }
do {
let response = try await APIClient.shared.resumeSession(sessionId: session.id)
resumeCard = response.resumeCard
showingResumeCard = true
} catch {
errorMessage = error.localizedDescription
}
}
func completeCurrentStep() async {
guard let step = currentStep else { return }
do {
let updated = try await APIClient.shared.completeStep(stepId: step.id)
if let idx = activeSteps.firstIndex(where: { $0.id == updated.id }) {
activeSteps[idx] = updated
}
// Advance to next pending
if let next = activeSteps.firstIndex(where: { $0.status == "pending" }) {
currentStepIndex = next
}
} catch {
errorMessage = error.localizedDescription
}
}
// MARK: - App Switch Observer
private func startAppObserver() {
let current = NSWorkspace.shared.frontmostApplication
lastApp = (current?.localizedName ?? "", current?.bundleIdentifier ?? "")
lastAppEnteredAt = Date()
appSwitches = []
appSwitchObserver = NSWorkspace.shared.notificationCenter.addObserver(
forName: NSWorkspace.didActivateApplicationNotification,
object: nil,
queue: .main
) { [weak self] notification in
guard let app = notification.userInfo?[NSWorkspace.applicationUserInfoKey] as? NSRunningApplication
else { return }
Task { @MainActor [weak self] in self?.handleAppSwitch(app: app) }
}
}
private func stopAppObserver() {
if let observer = appSwitchObserver {
NSWorkspace.shared.notificationCenter.removeObserver(observer)
appSwitchObserver = nil
}
appSwitches = []
}
private func handleAppSwitch(app: NSRunningApplication) {
let name = app.localizedName ?? "Unknown"
let bundleId = app.bundleIdentifier ?? ""
let now = Date()
guard name != lastApp.name else { return }
// Log previous app's dwell time to backend (fire-and-forget)
let duration = max(1, Int(now.timeIntervalSince(lastAppEnteredAt)))
let prev = lastApp
if let session = activeSession, !prev.name.isEmpty {
Task {
_ = try? await APIClient.shared.appActivity(
sessionId: session.id,
appBundleId: prev.bundleId,
appName: prev.name,
durationSeconds: duration
)
}
}
lastApp = (name, bundleId)
lastAppEnteredAt = now
appSwitches.append((name: name, bundleId: bundleId, time: now))
if appSwitches.count > 30 { appSwitches.removeFirst() }
// Only trigger card during active session and when none is already showing
guard isSessionActive, proactiveCard == nil else { return }
if let loop = detectRepetitiveLoop() {
proactiveCard = ProactiveCard(source: .appSwitchLoop(apps: loop.apps, switchCount: loop.count))
}
}
// Detects a back-and-forth pattern between exactly 2 apps within a 5-minute window.
// Requires 3 full cycles (6 consecutive alternating switches) to avoid false positives.
private func detectRepetitiveLoop() -> (apps: [String], count: Int)? {
let cutoff = Date().addingTimeInterval(-300)
let recent = appSwitches.filter { $0.time > cutoff }.map(\.name)
guard recent.count >= 6 else { return nil }
let last6 = Array(recent.suffix(6))
guard Set(last6).count == 2 else { return nil }
// Strictly alternating no two consecutive identical app names
for i in 1..<last6.count {
if last6[i] == last6[i - 1] { return nil }
}
return (apps: Array(Set(last6)).sorted(), count: 3)
}
// MARK: - Argus Subprocess (device-side VLM)
/// Launch the argus Python daemon as a subprocess.
/// Argus captures screenshots itself, runs them through a local VLM (Ollama/Gemini),
/// posts results to the backend, and emits RESULT:{json} lines to stdout for Swift to consume.
/// Falls back to the internal `startCapture()` loop if the process cannot be launched.
private func startArgus(session: FocusSession, task: AppTask?) {
guard FileManager.default.fileExists(atPath: argusPythonPath),
FileManager.default.fileExists(atPath: argusRepoPath) else {
argusStatus = "⚠️ Argus not found — using fallback capture"
startCapture()
return
}
// Encode steps as JSON for --steps-json arg
var stepsJSONString = "[]"
if !activeSteps.isEmpty {
let stepsArray: [[String: Any]] = activeSteps.map { step in
var s: [String: Any] = [
"id": step.id,
"sort_order": step.sortOrder,
"title": step.title,
"status": step.status
]
if let note = step.checkpointNote { s["checkpoint_note"] = note }
return s
}
if let data = try? JSONSerialization.data(withJSONObject: stepsArray),
let str = String(data: data, encoding: .utf8) {
stepsJSONString = str
}
}
let jwt = TokenStore.shared.token ?? ""
let geminiKey = UserDefaults.standard.string(forKey: "geminiApiKey") ?? ""
var arguments = [
"-m", "argus",
"--session-id", session.id,
"--task-title", task?.title ?? "(no task)",
"--task-goal", task?.description ?? "",
"--steps-json", stepsJSONString,
"--window-title", NSWorkspace.shared.frontmostApplication?.localizedName ?? "",
"--vlm", "gemini",
"--jwt", jwt,
"--backend-url", "https://wahwa.com/api/v1",
"--swift-ipc"
]
if !geminiKey.isEmpty {
arguments += ["--gemini-key", geminiKey]
}
let process = Process()
process.executableURL = URL(fileURLWithPath: argusPythonPath)
process.currentDirectoryURL = URL(fileURLWithPath: argusRepoPath)
process.arguments = arguments
// Pipe stdout for RESULT: lines; redirect stderr so argus logs don't clutter console
let stdoutPipe = Pipe()
let stderrPipe = Pipe()
process.standardOutput = stdoutPipe
process.standardError = stderrPipe
do {
try process.launch()
} catch {
argusStatus = "⚠️ Argus failed to launch — using fallback capture"
startCapture()
return
}
argusProcess = process
isCapturing = true
let taskLabel = task?.title ?? "session"
argusStatus = "🚀 Argus started — waiting for first screenshot…"
sendDebugNotification(title: "🚀 Argus VLM Started", body: "Screen monitoring active for \(taskLabel)")
// Read RESULT: lines from argus stdout in a background task
let fileHandle = stdoutPipe.fileHandleForReading
sendDebugNotification(title: "🚀 Argus VLM Started", body: "Screen monitoring active for \(taskLabel)")
argusReadTask = Task { [weak self] in
do {
for try await line in fileHandle.bytes.lines {
guard let self, !Task.isCancelled else { break }
if line.hasPrefix("STATUS:") {
let event = String(line.dropFirst("STATUS:".count))
await MainActor.run { self.handleArgusStatus(event) }
} else if line.hasPrefix("RESULT:") {
let jsonStr = String(line.dropFirst("RESULT:".count))
guard let data = jsonStr.data(using: .utf8) else { continue }
if let result = try? JSONDecoder().decode(DistractionAnalysisResponse.self, from: data) {
await MainActor.run {
let summary = result.vlmSummary ?? "no summary"
self.argusStatus = "\(summary)"
self.sendDebugNotification(title: "✅ VLM Result", body: summary)
self.applyDistractionResult(result)
}
}
}
}
} catch {
// Pipe closed argus process ended
}
}
}
private func stopArgus() {
argusReadTask?.cancel()
argusReadTask = nil
if let proc = argusProcess {
proc.terminate()
argusProcess = nil
isCapturing = false
}
}
// MARK: - Screenshot Capture Loop (fallback when argus is unavailable)
private func startCapture() {
isCapturing = true
captureTask = Task { [weak self] in
guard let self else { return }
// Capture immediately on session start, then repeat on interval
await self.captureAndAnalyze()
while !Task.isCancelled && self.isSessionActive {
try? await Task.sleep(for: .seconds(self.captureInterval))
guard !Task.isCancelled && self.isSessionActive else { break }
await self.captureAndAnalyze()
}
}
}
private func stopCapture() {
captureTask?.cancel()
captureTask = nil
isCapturing = false
}
private func captureAndAnalyze() async {
guard let session = activeSession else { return }
guard let imageData = await captureScreen() else { return }
let windowTitle = NSWorkspace.shared.frontmostApplication?.localizedName ?? "Unknown"
var context = buildTaskContext()
// Inject rolling history so the VLM has temporal context across captures.
// Only summaries (text) are sent not the raw images to keep token cost low.
if !screenshotHistory.isEmpty {
let iso = ISO8601DateFormatter()
context["screenshot_history"] = screenshotHistory.map { entry in
["summary": entry.summary, "timestamp": iso.string(from: entry.timestamp)]
}
}
do {
let result = try await APIClient.shared.analyzeScreenshot(
imageData: imageData,
windowTitle: windowTitle,
sessionId: session.id,
taskContext: context
)
// Append this result's summary to the rolling buffer (max 4 entries)
if let summary = result.vlmSummary {
screenshotHistory.append(ScreenshotHistoryEntry(summary: summary, timestamp: Date()))
if screenshotHistory.count > 4 { screenshotHistory.removeFirst() }
}
applyDistractionResult(result)
} catch {
// Silent fail don't interrupt the user
}
}
private func captureScreen() async -> Data? {
do {
let content = try await SCShareableContent.current
guard let display = content.displays.first else { return nil }
let config = SCStreamConfiguration()
config.width = 1280
config.height = 720
let filter = SCContentFilter(display: display, excludingWindows: [])
let image = try await SCScreenshotManager.captureImage(
contentFilter: filter,
configuration: config
)
return cgImageToJPEG(image)
} catch {
return nil
}
}
private func cgImageToJPEG(_ image: CGImage) -> Data? {
let nsImage = NSImage(cgImage: image, size: .zero)
guard let tiff = nsImage.tiffRepresentation,
let bitmap = NSBitmapImageRep(data: tiff),
let jpeg = bitmap.representation(using: .jpeg, properties: [.compressionFactor: 0.5])
else { return nil }
return jpeg
}
private func buildTaskContext() -> [String: Any] {
var ctx: [String: Any] = [:]
guard let task = activeTask else { return ctx }
ctx["task_title"] = task.title
ctx["task_goal"] = task.description ?? task.title
ctx["steps"] = activeSteps.map { step -> [String: Any] in
var s: [String: Any] = [
"id": step.id,
"sort_order": step.sortOrder,
"title": step.title,
"status": step.status
]
if let note = step.checkpointNote { s["checkpoint_note"] = note }
return s
}
return ctx
}
private func applyDistractionResult(_ result: DistractionAnalysisResponse) {
// 1. Apply step side-effects (always)
for completedId in result.stepsCompleted {
if let idx = activeSteps.firstIndex(where: { $0.id == completedId }) {
activeSteps[idx].status = "done"
}
}
if let note = result.checkpointNoteUpdate,
let stepId = result.currentStepId,
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
activeSteps[idx].checkpointNote = note
}
if let stepId = result.currentStepId,
let idx = activeSteps.firstIndex(where: { $0.id == stepId }) {
currentStepIndex = idx
}
// 2. Notification priority (design spec §1.5):
// Proactive friction help Context resume Gentle nudge
// NEVER nudge when the system could help instead.
if let friction = result.friction, friction.isActionable {
if friction.isResumption {
// Task resumption detected auto-surface resume card without button press
Task { await fetchResumeCard() }
} else if proactiveCard == nil {
proactiveCard = ProactiveCard(source: .vlmFriction(
frictionType: friction.type,
description: friction.description,
actions: friction.proposedActions
))
}
} else if !result.onTask, result.confidence > 0.7, let nudge = result.gentleNudge {
// Only nudge if VLM found no actionable friction
distractionCount += 1
lastNudge = nudge
sendNudgeNotification(nudge)
}
}
// MARK: - Notifications
private func handleArgusStatus(_ event: String) {
switch event {
case "screenshot_captured":
argusStatus = "📸 Screenshot captured — sending to VLM…"
sendDebugNotification(title: "📸 Screenshot Captured", body: "Sending to VLM for analysis…")
case "vlm_running":
argusStatus = "🤖 VLM analyzing screen…"
sendDebugNotification(title: "🤖 VLM Running", body: "Gemini is analyzing your screen…")
case "vlm_done":
argusStatus = "🧠 VLM done — applying result…"
sendDebugNotification(title: "🧠 VLM Done", body: "Analysis complete, processing result…")
default:
break
}
}
private func sendDebugNotification(title: String, body: String) {
let content = UNMutableNotificationContent()
content.title = title
content.body = body
let req = UNNotificationRequest(
identifier: "debug-\(UUID().uuidString)",
content: content,
trigger: nil
)
UNUserNotificationCenter.current().add(req)
}
private func sendNudgeNotification(_ nudge: String) {
let content = UNMutableNotificationContent()
content.title = "Hey, quick check-in!"
content.body = nudge
content.sound = .default
let req = UNNotificationRequest(
identifier: UUID().uuidString,
content: content,
trigger: nil
)
UNUserNotificationCenter.current().add(req)
}
private func requestNotificationPermission() async {
try? await UNUserNotificationCenter.current()
.requestAuthorization(options: [.alert, .sound])
}
}