// HistoryBuffer.swift — Two-tier rolling history for VLM temporal context
// Ports Python argus buffer.py: image tier (recent frames) + text tier (older summaries).
// The VLM sees recent images directly AND gets text context for events 30-60s ago.

import Foundation

/// A single buffered screenshot frame with its VLM summary.
struct BufferEntry: Sendable {
    let imageData: Data      // JPEG bytes
    var summary: String      // VLM-generated summary (populated after analysis)
    let timestamp: Date
    var fileUri: String?     // Gemini Files API URI (set async after upload; nil = use inline)
}

/// A text-only summary from an older analysis (images already evicted).
struct TextEntry: Sendable {
    let summary: String
    let timestamp: Date
}

/// Two-tier rolling buffer that provides temporal context to the VLM.
///
/// - **Image tier:** Last N frames (JPEG + summary + timestamp). Sent as images.
/// - **Text tier:** Older summaries that rolled off the image buffer. Sent as text.
/// - **Last output:** Previous VLM JSON result for self-refinement.
/// - **Last execution:** Executor action summary to prevent re-flagging.
///
/// Only accessed from `SessionManager` on the main actor — no concurrent access.
@MainActor
final class HistoryBuffer {

    private let imageMaxLen: Int
    private let textMaxLen: Int

    /// Recent frames — sent as images to the VLM.
    private(set) var images: [BufferEntry] = []

    /// Older summaries — sent as text context.
    private(set) var textHistory: [TextEntry] = []

    /// Full VLM JSON output from last analysis (for self-refinement).
    private(set) var lastOutput: String = ""

    /// Summary of last executor action (prevents re-flagging same friction).
    private(set) var lastExecution: String = ""

    /// Counter for how many VLM calls since execution was set (clear after 3).
    private var executionAge: Int = 0

    init(imageMaxLen: Int = 4, textMaxLen: Int = 12) {
        self.imageMaxLen = imageMaxLen
        self.textMaxLen = textMaxLen
    }

    // MARK: - Push / Update

    /// Add a new frame to the image buffer. If the buffer is full, the oldest
    /// frame's summary is promoted to the text tier before eviction.
    func push(imageData: Data, summary: String = "") {
        let entry = BufferEntry(imageData: imageData, summary: summary, timestamp: Date())

        if images.count >= imageMaxLen {
            // Promote oldest image's summary to text tier (if non-empty)
            let evicted = images.removeFirst()
            if !evicted.summary.isEmpty {
                textHistory.append(TextEntry(summary: evicted.summary, timestamp: evicted.timestamp))
                if textHistory.count > textMaxLen {
                    textHistory.removeFirst()
                }
            }
        }
        images.append(entry)
    }

    /// Update the summary on the most recent image entry (called after VLM returns).
    func updateLastSummary(_ summary: String) {
        guard !images.isEmpty else { return }
        images[images.count - 1].summary = summary
    }

    /// Store the Gemini Files API URI for the frame with the given timestamp.
    /// Called asynchronously after upload completes — safe because pushes happen at 5s intervals.
    func updateFileUri(_ uri: String, forTimestamp ts: Date) {
        guard let idx = images.firstIndex(where: { abs($0.timestamp.timeIntervalSince(ts)) < 1.0 }) else { return }
        images[idx].fileUri = uri
    }

    /// Store the full VLM JSON output for self-refinement on the next call.
    func setLastOutput(_ json: String) {
        lastOutput = json
    }

    /// Store executor action summary. Cleared automatically after 3 VLM iterations.
    func setLastExecution(_ summary: String) {
        lastExecution = summary
        executionAge = 0
    }

    /// Tick execution age — call after each VLM analysis. Clears after 3.
    func tickExecutionAge() {
        if !lastExecution.isEmpty {
            executionAge += 1
            if executionAge >= 3 {
                lastExecution = ""
                executionAge = 0
            }
        }
    }

    /// Get all buffered JPEG frames (for sending to VLM as images).
    var frameData: [Data] {
        images.map(\.imageData)
    }

    /// File URIs parallel to frameData — nil means fall back to inline base64 for that frame.
    var fileUris: [String?] {
        images.map(\.fileUri)
    }

    /// Get recent summaries as strings (for recentSummaries parameter).
    var recentSummaries: [String] {
        images.compactMap { $0.summary.isEmpty ? nil : $0.summary }
    }

    /// Clear all state (e.g., on session end).
    func clear() {
        images.removeAll()
        textHistory.removeAll()
        lastOutput = ""
        lastExecution = ""
        executionAge = 0
    }

    // MARK: - Prompt Formatting

    /// Build the temporal context section for the VLM prompt.
    /// Returns a formatted string with older text context + image labels.
    func formatForPrompt() -> String {
        var lines: [String] = []

        // Older text-only context (no images — just summaries)
        if !textHistory.isEmpty {
            lines.append("Older context (text only, no images):")
            for entry in textHistory {
                let age = Int(Date().timeIntervalSince(entry.timestamp))
                lines.append("  - [\(age)s ago] \(entry.summary)")
            }
            lines.append("")
        }

        // Recent image labels (these accompany the actual images sent to the VLM)
        if !images.isEmpty {
            let total = images.count
            lines.append("Recent screenshots (\(total) frames, newest last):")
            for (i, entry) in images.enumerated() {
                let age = Int(Date().timeIntervalSince(entry.timestamp))
                let isCurrent = (i == images.count - 1)
                let label = "  - Screenshot \(i + 1)/\(total): [\(isCurrent ? "now" : "\(age)s ago")]"
                if !entry.summary.isEmpty {
                    lines.append("\(label) \(entry.summary)")
                } else {
                    lines.append(label)
                }
            }
        }

        return lines.isEmpty ? "(no previous context)" : lines.joined(separator: "\n")
    }

    /// Format the last VLM output for self-refinement injection into the prompt.
    func formatLastOutput() -> String {
        guard !lastOutput.isEmpty else { return "" }
        return """
        Your previous analysis (refine or correct this based on new evidence):
        \(lastOutput)
        If your previous analysis was wrong or incomplete, correct it now. If it was accurate, build on it.
        """
    }

    /// Format execution context for injection into the prompt.
    func formatLastExecution() -> String {
        guard !lastExecution.isEmpty else { return "" }
        return """
        IMPORTANT — An AI agent just completed an action for the user:
          \(lastExecution)
        This task is DONE. Do not re-flag the same friction. Look for what the user does NEXT.
        """
    }
}