From f7f14b2c5dcc544fca75adebcadef9e2f652f6d1 Mon Sep 17 00:00:00 2001 From: pulipakaa24 Date: Sat, 14 Feb 2026 23:10:53 -0600 Subject: [PATCH] AR version pre-test --- SousChefAI/Services/ARVisionService.swift | 220 +++++++++++++ .../Services/OvershootVisionService.swift | 292 ------------------ 2 files changed, 220 insertions(+), 292 deletions(-) create mode 100644 SousChefAI/Services/ARVisionService.swift delete mode 100644 SousChefAI/Services/OvershootVisionService.swift diff --git a/SousChefAI/Services/ARVisionService.swift b/SousChefAI/Services/ARVisionService.swift new file mode 100644 index 0000000..a1af8e7 --- /dev/null +++ b/SousChefAI/Services/ARVisionService.swift @@ -0,0 +1,220 @@ +// +// ARVisionService.swift +// SousChefAI +// +// AR-based vision service using RealityKit and ARKit +// Provides real-time plane detection and raycasting capabilities +// + +import Foundation +import SwiftUI +import RealityKit +import ARKit +@preconcurrency import CoreVideo + +/// AR-based implementation for vision and spatial scanning +final class ARVisionService: VisionService, @unchecked Sendable { + + // MARK: - VisionService Protocol Implementation + + func detectIngredients(from stream: AsyncStream) async throws -> [Ingredient] { + // Mock implementation - in a real app, this would use ML models + // to detect ingredients from AR camera frames + var detectedIngredients: [Ingredient] = [] + var frameCount = 0 + + for await pixelBuffer in stream { + frameCount += 1 + + // Process every 30th frame to reduce processing load + if frameCount % 30 == 0 { + let ingredients = try await processARFrame(pixelBuffer) + + // Merge results + for ingredient in ingredients { + if !detectedIngredients.contains(where: { $0.name == ingredient.name }) { + detectedIngredients.append(ingredient) + } + } + + // Stop after collecting enough ingredients + if detectedIngredients.count >= AppConfig.maxIngredientsPerScan { + break + } + } + } + + return detectedIngredients + .filter { $0.confidence >= AppConfig.minConfidenceThreshold } + .sorted { $0.confidence > $1.confidence } + } + + func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] { + return try await processARFrame(pixelBuffer) + } + + func analyzeCookingProgress(from stream: AsyncStream, for step: String) async throws -> CookingProgress { + // Mock implementation for cooking progress monitoring + return CookingProgress( + isComplete: false, + confidence: 0.5, + feedback: "Monitoring cooking progress..." + ) + } + + // MARK: - Private Helper Methods + + private func processARFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] { + // Mock ingredient detection + // In a real implementation, this would use Vision framework or ML models + // to detect objects in the AR camera feed + + // For now, return empty array - actual detection would happen here + return [] + } +} + +/// SwiftUI wrapper for ARView with plane detection and raycasting +struct ARViewContainer: UIViewRepresentable { + @Binding var detectedPlanes: Int + @Binding var lastRaycastResult: String + + func makeUIView(context: Context) -> ARView { + let arView = ARView(frame: .zero) + + // Configure AR session + let configuration = ARWorldTrackingConfiguration() + + // Enable plane detection for horizontal and vertical surfaces + configuration.planeDetection = [.horizontal, .vertical] + + // Enable scene reconstruction for better spatial understanding + if ARWorldTrackingConfiguration.supportsSceneReconstruction(.mesh) { + configuration.sceneReconstruction = .mesh + } + + // Enable debug options to visualize detected planes + arView.debugOptions = [.showSceneUnderstanding, .showWorldOrigin] + + // Set the coordinator as the session delegate + arView.session.delegate = context.coordinator + + // Run the AR session + arView.session.run(configuration) + + // Add tap gesture for raycasting + let tapGesture = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap(_:))) + arView.addGestureRecognizer(tapGesture) + + context.coordinator.arView = arView + + return arView + } + + func updateUIView(_ uiView: ARView, context: Context) { + // Update UI if needed + } + + func makeCoordinator() -> Coordinator { + Coordinator(detectedPlanes: $detectedPlanes, lastRaycastResult: $lastRaycastResult) + } + + // MARK: - Coordinator + + class Coordinator: NSObject, ARSessionDelegate { + @Binding var detectedPlanes: Int + @Binding var lastRaycastResult: String + weak var arView: ARView? + private var detectedPlaneAnchors: Set = [] + + init(detectedPlanes: Binding, lastRaycastResult: Binding) { + _detectedPlanes = detectedPlanes + _lastRaycastResult = lastRaycastResult + } + + // MARK: - ARSessionDelegate Methods + + func session(_ session: ARSession, didAdd anchors: [ARAnchor]) { + for anchor in anchors { + if let planeAnchor = anchor as? ARPlaneAnchor { + detectedPlaneAnchors.insert(planeAnchor.identifier) + DispatchQueue.main.async { + self.detectedPlanes = self.detectedPlaneAnchors.count + } + } + } + } + + func session(_ session: ARSession, didUpdate anchors: [ARAnchor]) { + // Planes are being updated as AR refines understanding + } + + func session(_ session: ARSession, didRemove anchors: [ARAnchor]) { + for anchor in anchors { + if let planeAnchor = anchor as? ARPlaneAnchor { + detectedPlaneAnchors.remove(planeAnchor.identifier) + DispatchQueue.main.async { + self.detectedPlanes = self.detectedPlaneAnchors.count + } + } + } + } + + func session(_ session: ARSession, didFailWithError error: Error) { + print("AR Session failed: \(error.localizedDescription)") + } + + // MARK: - Raycasting + + /// Performs a raycast from screen center to detect planes + func performRaycast(from point: CGPoint, in view: ARView) -> ARRaycastResult? { + // Create raycast query targeting estimated planes + guard let query = view.makeRaycastQuery( + from: point, + allowing: .estimatedPlane, + alignment: .any + ) else { + return nil + } + + // Perform the raycast + let results = view.session.raycast(query) + return results.first + } + + @objc func handleTap(_ gesture: UITapGestureRecognizer) { + guard let arView = arView else { return } + + let location = gesture.location(in: arView) + + if let result = performRaycast(from: location, in: arView) { + let position = result.worldTransform.columns.3 + let resultString = String(format: "Hit at: (%.2f, %.2f, %.2f)", position.x, position.y, position.z) + + DispatchQueue.main.async { + self.lastRaycastResult = resultString + } + + // Place a visual marker at the hit location + placeMarker(at: result.worldTransform, in: arView) + } else { + DispatchQueue.main.async { + self.lastRaycastResult = "No surface detected" + } + } + } + + private func placeMarker(at transform: simd_float4x4, in arView: ARView) { + // Create a small sphere to visualize the raycast hit + let sphere = MeshResource.generateSphere(radius: 0.02) + let material = SimpleMaterial(color: .green, isMetallic: false) + let modelEntity = ModelEntity(mesh: sphere, materials: [material]) + + // Create an anchor at the hit position + let anchorEntity = AnchorEntity(world: transform) + anchorEntity.addChild(modelEntity) + + arView.scene.addAnchor(anchorEntity) + } + } +} diff --git a/SousChefAI/Services/OvershootVisionService.swift b/SousChefAI/Services/OvershootVisionService.swift deleted file mode 100644 index 9e75c1f..0000000 --- a/SousChefAI/Services/OvershootVisionService.swift +++ /dev/null @@ -1,292 +0,0 @@ -// -// OvershootVisionService.swift -// SousChefAI -// -// Concrete implementation of VisionService using Overshoot API -// Provides low-latency real-time video inference for ingredient detection -// - -import Foundation -@preconcurrency import CoreVideo -import UIKit - -/// Overshoot API implementation for vision-based ingredient detection -final class OvershootVisionService: VisionService, @unchecked Sendable { - - private let apiKey: String - private let webSocketURL: URL - private var webSocketTask: URLSessionWebSocketTask? - private let session: URLSession - - nonisolated init(apiKey: String = AppConfig.overshootAPIKey, - webSocketURL: String = AppConfig.overshootWebSocketURL) { - self.apiKey = apiKey - guard let url = URL(string: webSocketURL) else { - fatalError("Invalid WebSocket URL: \(webSocketURL)") - } - self.webSocketURL = url - - let config = URLSessionConfiguration.default - config.timeoutIntervalForRequest = 30 - self.session = URLSession(configuration: config) - } - - // MARK: - VisionService Protocol Implementation - - func detectIngredients(from stream: AsyncStream) async throws -> [Ingredient] { - guard apiKey != "INSERT_KEY_HERE" else { - throw VisionServiceError.apiKeyMissing - } - - // Connect to WebSocket - try await connectWebSocket() - - var detectedIngredients: [String: Ingredient] = [:] - - // Process frames from stream - for await pixelBuffer in stream { - do { - let frameIngredients = try await processFrame(pixelBuffer) - - // Merge results (keep highest confidence for each ingredient) - for ingredient in frameIngredients { - if let existing = detectedIngredients[ingredient.name] { - if ingredient.confidence > existing.confidence { - detectedIngredients[ingredient.name] = ingredient - } - } else { - detectedIngredients[ingredient.name] = ingredient - } - } - - // Limit to max ingredients - if detectedIngredients.count >= AppConfig.maxIngredientsPerScan { - break - } - } catch { - print("Error processing frame: \(error)") - continue - } - } - - disconnectWebSocket() - - return Array(detectedIngredients.values) - .filter { $0.confidence >= AppConfig.minConfidenceThreshold } - .sorted { $0.confidence > $1.confidence } - } - - func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] { - guard apiKey != "INSERT_KEY_HERE" else { - throw VisionServiceError.apiKeyMissing - } - - // For single frame, use REST API instead of WebSocket - return try await detectIngredientsViaREST(pixelBuffer) - } - - func analyzeCookingProgress(from stream: AsyncStream, for step: String) async throws -> CookingProgress { - guard apiKey != "INSERT_KEY_HERE" else { - throw VisionServiceError.apiKeyMissing - } - - // Connect to WebSocket for real-time monitoring - try await connectWebSocket() - - var latestProgress = CookingProgress(isComplete: false, confidence: 0.0, feedback: "Analyzing...") - - // Monitor frames for cooking completion - for await pixelBuffer in stream { - do { - let progress = try await analyzeCookingFrame(pixelBuffer, step: step) - latestProgress = progress - - if progress.isComplete && progress.confidence > 0.8 { - disconnectWebSocket() - return progress - } - } catch { - print("Error analyzing cooking frame: \(error)") - continue - } - } - - disconnectWebSocket() - return latestProgress - } - - // MARK: - Private Helper Methods - - private func connectWebSocket() async throws { - var request = URLRequest(url: webSocketURL) - request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - - webSocketTask = session.webSocketTask(with: request) - webSocketTask?.resume() - - // Wait for connection - try await Task.sleep(for: .milliseconds(500)) - } - - private func disconnectWebSocket() { - webSocketTask?.cancel(with: .goingAway, reason: nil) - webSocketTask = nil - } - - private func processFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] { - // Convert pixel buffer to JPEG data - guard let imageData = pixelBufferToJPEG(pixelBuffer) else { - throw VisionServiceError.invalidResponse - } - - // Create WebSocket message - let message = OvershootRequest( - type: "detect_ingredients", - image: imageData.base64EncodedString(), - timestamp: Date().timeIntervalSince1970 - ) - - // Send frame via WebSocket - let messageData = try JSONEncoder().encode(message) - let messageString = String(data: messageData, encoding: .utf8)! - - try await webSocketTask?.send(.string(messageString)) - - // Receive response - guard let response = try await receiveWebSocketMessage() else { - return [] - } - - return parseIngredients(from: response) - } - - private func analyzeCookingFrame(_ pixelBuffer: CVPixelBuffer, step: String) async throws -> CookingProgress { - guard let imageData = pixelBufferToJPEG(pixelBuffer) else { - throw VisionServiceError.invalidResponse - } - - let message = OvershootRequest( - type: "analyze_cooking", - image: imageData.base64EncodedString(), - timestamp: Date().timeIntervalSince1970, - context: step - ) - - let messageData = try JSONEncoder().encode(message) - let messageString = String(data: messageData, encoding: .utf8)! - - try await webSocketTask?.send(.string(messageString)) - - guard let response = try await receiveWebSocketMessage() else { - return CookingProgress(isComplete: false, confidence: 0.0, feedback: "No response") - } - - return parseCookingProgress(from: response) - } - - private func receiveWebSocketMessage() async throws -> OvershootResponse? { - guard let message = try await webSocketTask?.receive() else { - return nil - } - - switch message { - case .string(let text): - guard let data = text.data(using: .utf8) else { return nil } - return try? JSONDecoder().decode(OvershootResponse.self, from: data) - case .data(let data): - return try? JSONDecoder().decode(OvershootResponse.self, from: data) - @unknown default: - return nil - } - } - - private func detectIngredientsViaREST(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] { - // Fallback REST API implementation - // This would be used for single-frame detection - - guard let imageData = pixelBufferToJPEG(pixelBuffer) else { - throw VisionServiceError.invalidResponse - } - - var request = URLRequest(url: URL(string: "https://api.overshoot.ai/v1/detect")!) - request.httpMethod = "POST" - request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - - let requestBody = OvershootRequest( - type: "detect_ingredients", - image: imageData.base64EncodedString(), - timestamp: Date().timeIntervalSince1970 - ) - - request.httpBody = try JSONEncoder().encode(requestBody) - - let (data, _) = try await session.data(for: request) - let response = try JSONDecoder().decode(OvershootResponse.self, from: data) - - return parseIngredients(from: response) - } - - private func parseIngredients(from response: OvershootResponse) -> [Ingredient] { - guard let detections = response.detections else { return [] } - - return detections.map { detection in - Ingredient( - name: detection.label, - estimatedQuantity: detection.quantity ?? "Unknown", - confidence: detection.confidence - ) - } - } - - private func parseCookingProgress(from response: OvershootResponse) -> CookingProgress { - CookingProgress( - isComplete: response.isComplete ?? false, - confidence: response.confidence ?? 0.0, - feedback: response.feedback ?? "Processing..." - ) - } - - private func pixelBufferToJPEG(_ pixelBuffer: CVPixelBuffer) -> Data? { - let ciImage = CIImage(cvPixelBuffer: pixelBuffer) - let context = CIContext() - - guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else { - return nil - } - - let uiImage = UIImage(cgImage: cgImage) - return uiImage.jpegData(compressionQuality: 0.8) - } -} - -// MARK: - Overshoot API Models - -private struct OvershootRequest: Codable { - let type: String - let image: String - let timestamp: TimeInterval - var context: String? -} - -private struct OvershootResponse: Codable { - let detections: [Detection]? - let isComplete: Bool? - let confidence: Double? - let feedback: String? - - struct Detection: Codable { - let label: String - let confidence: Double - let quantity: String? - let boundingBox: BoundingBox? - } - - struct BoundingBox: Codable { - let x: Double - let y: Double - let width: Double - let height: Double - } -}