AR version pre-test

2026-02-14 23:10:53 -06:00
parent a46264c1f5
commit f7f14b2c5d
2 changed files with 220 additions and 292 deletions
--- a/SousChefAI/Services/ARVisionService.swift
+++ b/SousChefAI/Services/ARVisionService.swift
@@ -0,0 +1,220 @@
+//
+//  ARVisionService.swift
+//  SousChefAI
+//
+//  AR-based vision service using RealityKit and ARKit
+//  Provides real-time plane detection and raycasting capabilities
+//
+
+import Foundation
+import SwiftUI
+import RealityKit
+import ARKit
+@preconcurrency import CoreVideo
+
+/// AR-based implementation for vision and spatial scanning
+final class ARVisionService: VisionService, @unchecked Sendable {
+    
+    // MARK: - VisionService Protocol Implementation
+    
+    func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
+        // Mock implementation - in a real app, this would use ML models
+        // to detect ingredients from AR camera frames
+        var detectedIngredients: [Ingredient] = []
+        var frameCount = 0
+        
+        for await pixelBuffer in stream {
+            frameCount += 1
+            
+            // Process every 30th frame to reduce processing load
+            if frameCount % 30 == 0 {
+                let ingredients = try await processARFrame(pixelBuffer)
+                
+                // Merge results
+                for ingredient in ingredients {
+                    if !detectedIngredients.contains(where: { $0.name == ingredient.name }) {
+                        detectedIngredients.append(ingredient)
+                    }
+                }
+                
+                // Stop after collecting enough ingredients
+                if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
+                    break
+                }
+            }
+        }
+        
+        return detectedIngredients
+            .filter { $0.confidence >= AppConfig.minConfidenceThreshold }
+            .sorted { $0.confidence > $1.confidence }
+    }
+    
+    func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
+        return try await processARFrame(pixelBuffer)
+    }
+    
+    func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
+        // Mock implementation for cooking progress monitoring
+        return CookingProgress(
+            isComplete: false,
+            confidence: 0.5,
+            feedback: "Monitoring cooking progress..."
+        )
+    }
+    
+    // MARK: - Private Helper Methods
+    
+    private func processARFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
+        // Mock ingredient detection
+        // In a real implementation, this would use Vision framework or ML models
+        // to detect objects in the AR camera feed
+        
+        // For now, return empty array - actual detection would happen here
+        return []
+    }
+}
+
+/// SwiftUI wrapper for ARView with plane detection and raycasting
+struct ARViewContainer: UIViewRepresentable {
+    @Binding var detectedPlanes: Int
+    @Binding var lastRaycastResult: String
+    
+    func makeUIView(context: Context) -> ARView {
+        let arView = ARView(frame: .zero)
+        
+        // Configure AR session
+        let configuration = ARWorldTrackingConfiguration()
+        
+        // Enable plane detection for horizontal and vertical surfaces
+        configuration.planeDetection = [.horizontal, .vertical]
+        
+        // Enable scene reconstruction for better spatial understanding
+        if ARWorldTrackingConfiguration.supportsSceneReconstruction(.mesh) {
+            configuration.sceneReconstruction = .mesh
+        }
+        
+        // Enable debug options to visualize detected planes
+        arView.debugOptions = [.showSceneUnderstanding, .showWorldOrigin]
+        
+        // Set the coordinator as the session delegate
+        arView.session.delegate = context.coordinator
+        
+        // Run the AR session
+        arView.session.run(configuration)
+        
+        // Add tap gesture for raycasting
+        let tapGesture = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap(_:)))
+        arView.addGestureRecognizer(tapGesture)
+        
+        context.coordinator.arView = arView
+        
+        return arView
+    }
+    
+    func updateUIView(_ uiView: ARView, context: Context) {
+        // Update UI if needed
+    }
+    
+    func makeCoordinator() -> Coordinator {
+        Coordinator(detectedPlanes: $detectedPlanes, lastRaycastResult: $lastRaycastResult)
+    }
+    
+    // MARK: - Coordinator
+    
+    class Coordinator: NSObject, ARSessionDelegate {
+        @Binding var detectedPlanes: Int
+        @Binding var lastRaycastResult: String
+        weak var arView: ARView?
+        private var detectedPlaneAnchors: Set<UUID> = []
+        
+        init(detectedPlanes: Binding<Int>, lastRaycastResult: Binding<String>) {
+            _detectedPlanes = detectedPlanes
+            _lastRaycastResult = lastRaycastResult
+        }
+        
+        // MARK: - ARSessionDelegate Methods
+        
+        func session(_ session: ARSession, didAdd anchors: [ARAnchor]) {
+            for anchor in anchors {
+                if let planeAnchor = anchor as? ARPlaneAnchor {
+                    detectedPlaneAnchors.insert(planeAnchor.identifier)
+                    DispatchQueue.main.async {
+                        self.detectedPlanes = self.detectedPlaneAnchors.count
+                    }
+                }
+            }
+        }
+        
+        func session(_ session: ARSession, didUpdate anchors: [ARAnchor]) {
+            // Planes are being updated as AR refines understanding
+        }
+        
+        func session(_ session: ARSession, didRemove anchors: [ARAnchor]) {
+            for anchor in anchors {
+                if let planeAnchor = anchor as? ARPlaneAnchor {
+                    detectedPlaneAnchors.remove(planeAnchor.identifier)
+                    DispatchQueue.main.async {
+                        self.detectedPlanes = self.detectedPlaneAnchors.count
+                    }
+                }
+            }
+        }
+        
+        func session(_ session: ARSession, didFailWithError error: Error) {
+            print("AR Session failed: \(error.localizedDescription)")
+        }
+        
+        // MARK: - Raycasting
+        
+        /// Performs a raycast from screen center to detect planes
+        func performRaycast(from point: CGPoint, in view: ARView) -> ARRaycastResult? {
+            // Create raycast query targeting estimated planes
+            guard let query = view.makeRaycastQuery(
+                from: point,
+                allowing: .estimatedPlane,
+                alignment: .any
+            ) else {
+                return nil
+            }
+            
+            // Perform the raycast
+            let results = view.session.raycast(query)
+            return results.first
+        }
+        
+        @objc func handleTap(_ gesture: UITapGestureRecognizer) {
+            guard let arView = arView else { return }
+            
+            let location = gesture.location(in: arView)
+            
+            if let result = performRaycast(from: location, in: arView) {
+                let position = result.worldTransform.columns.3
+                let resultString = String(format: "Hit at: (%.2f, %.2f, %.2f)", position.x, position.y, position.z)
+                
+                DispatchQueue.main.async {
+                    self.lastRaycastResult = resultString
+                }
+                
+                // Place a visual marker at the hit location
+                placeMarker(at: result.worldTransform, in: arView)
+            } else {
+                DispatchQueue.main.async {
+                    self.lastRaycastResult = "No surface detected"
+                }
+            }
+        }
+        
+        private func placeMarker(at transform: simd_float4x4, in arView: ARView) {
+            // Create a small sphere to visualize the raycast hit
+            let sphere = MeshResource.generateSphere(radius: 0.02)
+            let material = SimpleMaterial(color: .green, isMetallic: false)
+            let modelEntity = ModelEntity(mesh: sphere, materials: [material])
+            
+            // Create an anchor at the hit position
+            let anchorEntity = AnchorEntity(world: transform)
+            anchorEntity.addChild(modelEntity)
+            
+            arView.scene.addAnchor(anchorEntity)
+        }
+    }
+}
--- a/SousChefAI/Services/OvershootVisionService.swift
+++ b/SousChefAI/Services/OvershootVisionService.swift
@@ -1,292 +0,0 @@
-//
-//  OvershootVisionService.swift
-//  SousChefAI
-//
-//  Concrete implementation of VisionService using Overshoot API
-//  Provides low-latency real-time video inference for ingredient detection
-//
-
-import Foundation
-@preconcurrency import CoreVideo
-import UIKit
-
-/// Overshoot API implementation for vision-based ingredient detection
-final class OvershootVisionService: VisionService, @unchecked Sendable {
-    
-    private let apiKey: String
-    private let webSocketURL: URL
-    private var webSocketTask: URLSessionWebSocketTask?
-    private let session: URLSession
-    
-    nonisolated init(apiKey: String = AppConfig.overshootAPIKey,
-                     webSocketURL: String = AppConfig.overshootWebSocketURL) {
-        self.apiKey = apiKey
-        guard let url = URL(string: webSocketURL) else {
-            fatalError("Invalid WebSocket URL: \(webSocketURL)")
-        }
-        self.webSocketURL = url
-        
-        let config = URLSessionConfiguration.default
-        config.timeoutIntervalForRequest = 30
-        self.session = URLSession(configuration: config)
-    }
-    
-    // MARK: - VisionService Protocol Implementation
-    
-    func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
-        guard apiKey != "INSERT_KEY_HERE" else {
-            throw VisionServiceError.apiKeyMissing
-        }
-        
-        // Connect to WebSocket
-        try await connectWebSocket()
-        
-        var detectedIngredients: [String: Ingredient] = [:]
-        
-        // Process frames from stream
-        for await pixelBuffer in stream {
-            do {
-                let frameIngredients = try await processFrame(pixelBuffer)
-                
-                // Merge results (keep highest confidence for each ingredient)
-                for ingredient in frameIngredients {
-                    if let existing = detectedIngredients[ingredient.name] {
-                        if ingredient.confidence > existing.confidence {
-                            detectedIngredients[ingredient.name] = ingredient
-                        }
-                    } else {
-                        detectedIngredients[ingredient.name] = ingredient
-                    }
-                }
-                
-                // Limit to max ingredients
-                if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
-                    break
-                }
-            } catch {
-                print("Error processing frame: \(error)")
-                continue
-            }
-        }
-        
-        disconnectWebSocket()
-        
-        return Array(detectedIngredients.values)
-            .filter { $0.confidence >= AppConfig.minConfidenceThreshold }
-            .sorted { $0.confidence > $1.confidence }
-    }
-    
-    func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
-        guard apiKey != "INSERT_KEY_HERE" else {
-            throw VisionServiceError.apiKeyMissing
-        }
-        
-        // For single frame, use REST API instead of WebSocket
-        return try await detectIngredientsViaREST(pixelBuffer)
-    }
-    
-    func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
-        guard apiKey != "INSERT_KEY_HERE" else {
-            throw VisionServiceError.apiKeyMissing
-        }
-        
-        // Connect to WebSocket for real-time monitoring
-        try await connectWebSocket()
-        
-        var latestProgress = CookingProgress(isComplete: false, confidence: 0.0, feedback: "Analyzing...")
-        
-        // Monitor frames for cooking completion
-        for await pixelBuffer in stream {
-            do {
-                let progress = try await analyzeCookingFrame(pixelBuffer, step: step)
-                latestProgress = progress
-                
-                if progress.isComplete && progress.confidence > 0.8 {
-                    disconnectWebSocket()
-                    return progress
-                }
-            } catch {
-                print("Error analyzing cooking frame: \(error)")
-                continue
-            }
-        }
-        
-        disconnectWebSocket()
-        return latestProgress
-    }
-    
-    // MARK: - Private Helper Methods
-    
-    private func connectWebSocket() async throws {
-        var request = URLRequest(url: webSocketURL)
-        request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
-        request.setValue("application/json", forHTTPHeaderField: "Content-Type")
-        
-        webSocketTask = session.webSocketTask(with: request)
-        webSocketTask?.resume()
-        
-        // Wait for connection
-        try await Task.sleep(for: .milliseconds(500))
-    }
-    
-    private func disconnectWebSocket() {
-        webSocketTask?.cancel(with: .goingAway, reason: nil)
-        webSocketTask = nil
-    }
-    
-    private func processFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
-        // Convert pixel buffer to JPEG data
-        guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
-            throw VisionServiceError.invalidResponse
-        }
-        
-        // Create WebSocket message
-        let message = OvershootRequest(
-            type: "detect_ingredients",
-            image: imageData.base64EncodedString(),
-            timestamp: Date().timeIntervalSince1970
-        )
-        
-        // Send frame via WebSocket
-        let messageData = try JSONEncoder().encode(message)
-        let messageString = String(data: messageData, encoding: .utf8)!
-        
-        try await webSocketTask?.send(.string(messageString))
-        
-        // Receive response
-        guard let response = try await receiveWebSocketMessage() else {
-            return []
-        }
-        
-        return parseIngredients(from: response)
-    }
-    
-    private func analyzeCookingFrame(_ pixelBuffer: CVPixelBuffer, step: String) async throws -> CookingProgress {
-        guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
-            throw VisionServiceError.invalidResponse
-        }
-        
-        let message = OvershootRequest(
-            type: "analyze_cooking",
-            image: imageData.base64EncodedString(),
-            timestamp: Date().timeIntervalSince1970,
-            context: step
-        )
-        
-        let messageData = try JSONEncoder().encode(message)
-        let messageString = String(data: messageData, encoding: .utf8)!
-        
-        try await webSocketTask?.send(.string(messageString))
-        
-        guard let response = try await receiveWebSocketMessage() else {
-            return CookingProgress(isComplete: false, confidence: 0.0, feedback: "No response")
-        }
-        
-        return parseCookingProgress(from: response)
-    }
-    
-    private func receiveWebSocketMessage() async throws -> OvershootResponse? {
-        guard let message = try await webSocketTask?.receive() else {
-            return nil
-        }
-        
-        switch message {
-        case .string(let text):
-            guard let data = text.data(using: .utf8) else { return nil }
-            return try? JSONDecoder().decode(OvershootResponse.self, from: data)
-        case .data(let data):
-            return try? JSONDecoder().decode(OvershootResponse.self, from: data)
-        @unknown default:
-            return nil
-        }
-    }
-    
-    private func detectIngredientsViaREST(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
-        // Fallback REST API implementation
-        // This would be used for single-frame detection
-        
-        guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
-            throw VisionServiceError.invalidResponse
-        }
-        
-        var request = URLRequest(url: URL(string: "https://api.overshoot.ai/v1/detect")!)
-        request.httpMethod = "POST"
-        request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
-        request.setValue("application/json", forHTTPHeaderField: "Content-Type")
-        
-        let requestBody = OvershootRequest(
-            type: "detect_ingredients",
-            image: imageData.base64EncodedString(),
-            timestamp: Date().timeIntervalSince1970
-        )
-        
-        request.httpBody = try JSONEncoder().encode(requestBody)
-        
-        let (data, _) = try await session.data(for: request)
-        let response = try JSONDecoder().decode(OvershootResponse.self, from: data)
-        
-        return parseIngredients(from: response)
-    }
-    
-    private func parseIngredients(from response: OvershootResponse) -> [Ingredient] {
-        guard let detections = response.detections else { return [] }
-        
-        return detections.map { detection in
-            Ingredient(
-                name: detection.label,
-                estimatedQuantity: detection.quantity ?? "Unknown",
-                confidence: detection.confidence
-            )
-        }
-    }
-    
-    private func parseCookingProgress(from response: OvershootResponse) -> CookingProgress {
-        CookingProgress(
-            isComplete: response.isComplete ?? false,
-            confidence: response.confidence ?? 0.0,
-            feedback: response.feedback ?? "Processing..."
-        )
-    }
-    
-    private func pixelBufferToJPEG(_ pixelBuffer: CVPixelBuffer) -> Data? {
-        let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
-        let context = CIContext()
-        
-        guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
-            return nil
-        }
-        
-        let uiImage = UIImage(cgImage: cgImage)
-        return uiImage.jpegData(compressionQuality: 0.8)
-    }
-}
-
-// MARK: - Overshoot API Models
-
-private struct OvershootRequest: Codable {
-    let type: String
-    let image: String
-    let timestamp: TimeInterval
-    var context: String?
-}
-
-private struct OvershootResponse: Codable {
-    let detections: [Detection]?
-    let isComplete: Bool?
-    let confidence: Double?
-    let feedback: String?
-    
-    struct Detection: Codable {
-        let label: String
-        let confidence: Double
-        let quantity: String?
-        let boundingBox: BoundingBox?
-    }
-    
-    struct BoundingBox: Codable {
-        let x: Double
-        let y: Double
-        let width: Double
-        let height: Double
-    }
-}