AR version pre-test

This commit is contained in:
2026-02-14 23:10:53 -06:00
parent a46264c1f5
commit f7f14b2c5d
2 changed files with 220 additions and 292 deletions

View File

@@ -0,0 +1,220 @@
//
// ARVisionService.swift
// SousChefAI
//
// AR-based vision service using RealityKit and ARKit
// Provides real-time plane detection and raycasting capabilities
//
import Foundation
import SwiftUI
import RealityKit
import ARKit
@preconcurrency import CoreVideo
/// AR-based implementation for vision and spatial scanning
final class ARVisionService: VisionService, @unchecked Sendable {
// MARK: - VisionService Protocol Implementation
func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
// Mock implementation - in a real app, this would use ML models
// to detect ingredients from AR camera frames
var detectedIngredients: [Ingredient] = []
var frameCount = 0
for await pixelBuffer in stream {
frameCount += 1
// Process every 30th frame to reduce processing load
if frameCount % 30 == 0 {
let ingredients = try await processARFrame(pixelBuffer)
// Merge results
for ingredient in ingredients {
if !detectedIngredients.contains(where: { $0.name == ingredient.name }) {
detectedIngredients.append(ingredient)
}
}
// Stop after collecting enough ingredients
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
break
}
}
}
return detectedIngredients
.filter { $0.confidence >= AppConfig.minConfidenceThreshold }
.sorted { $0.confidence > $1.confidence }
}
func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
return try await processARFrame(pixelBuffer)
}
func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
// Mock implementation for cooking progress monitoring
return CookingProgress(
isComplete: false,
confidence: 0.5,
feedback: "Monitoring cooking progress..."
)
}
// MARK: - Private Helper Methods
private func processARFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
// Mock ingredient detection
// In a real implementation, this would use Vision framework or ML models
// to detect objects in the AR camera feed
// For now, return empty array - actual detection would happen here
return []
}
}
/// SwiftUI wrapper for ARView with plane detection and raycasting
struct ARViewContainer: UIViewRepresentable {
@Binding var detectedPlanes: Int
@Binding var lastRaycastResult: String
func makeUIView(context: Context) -> ARView {
let arView = ARView(frame: .zero)
// Configure AR session
let configuration = ARWorldTrackingConfiguration()
// Enable plane detection for horizontal and vertical surfaces
configuration.planeDetection = [.horizontal, .vertical]
// Enable scene reconstruction for better spatial understanding
if ARWorldTrackingConfiguration.supportsSceneReconstruction(.mesh) {
configuration.sceneReconstruction = .mesh
}
// Enable debug options to visualize detected planes
arView.debugOptions = [.showSceneUnderstanding, .showWorldOrigin]
// Set the coordinator as the session delegate
arView.session.delegate = context.coordinator
// Run the AR session
arView.session.run(configuration)
// Add tap gesture for raycasting
let tapGesture = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap(_:)))
arView.addGestureRecognizer(tapGesture)
context.coordinator.arView = arView
return arView
}
func updateUIView(_ uiView: ARView, context: Context) {
// Update UI if needed
}
func makeCoordinator() -> Coordinator {
Coordinator(detectedPlanes: $detectedPlanes, lastRaycastResult: $lastRaycastResult)
}
// MARK: - Coordinator
class Coordinator: NSObject, ARSessionDelegate {
@Binding var detectedPlanes: Int
@Binding var lastRaycastResult: String
weak var arView: ARView?
private var detectedPlaneAnchors: Set<UUID> = []
init(detectedPlanes: Binding<Int>, lastRaycastResult: Binding<String>) {
_detectedPlanes = detectedPlanes
_lastRaycastResult = lastRaycastResult
}
// MARK: - ARSessionDelegate Methods
func session(_ session: ARSession, didAdd anchors: [ARAnchor]) {
for anchor in anchors {
if let planeAnchor = anchor as? ARPlaneAnchor {
detectedPlaneAnchors.insert(planeAnchor.identifier)
DispatchQueue.main.async {
self.detectedPlanes = self.detectedPlaneAnchors.count
}
}
}
}
func session(_ session: ARSession, didUpdate anchors: [ARAnchor]) {
// Planes are being updated as AR refines understanding
}
func session(_ session: ARSession, didRemove anchors: [ARAnchor]) {
for anchor in anchors {
if let planeAnchor = anchor as? ARPlaneAnchor {
detectedPlaneAnchors.remove(planeAnchor.identifier)
DispatchQueue.main.async {
self.detectedPlanes = self.detectedPlaneAnchors.count
}
}
}
}
func session(_ session: ARSession, didFailWithError error: Error) {
print("AR Session failed: \(error.localizedDescription)")
}
// MARK: - Raycasting
/// Performs a raycast from screen center to detect planes
func performRaycast(from point: CGPoint, in view: ARView) -> ARRaycastResult? {
// Create raycast query targeting estimated planes
guard let query = view.makeRaycastQuery(
from: point,
allowing: .estimatedPlane,
alignment: .any
) else {
return nil
}
// Perform the raycast
let results = view.session.raycast(query)
return results.first
}
@objc func handleTap(_ gesture: UITapGestureRecognizer) {
guard let arView = arView else { return }
let location = gesture.location(in: arView)
if let result = performRaycast(from: location, in: arView) {
let position = result.worldTransform.columns.3
let resultString = String(format: "Hit at: (%.2f, %.2f, %.2f)", position.x, position.y, position.z)
DispatchQueue.main.async {
self.lastRaycastResult = resultString
}
// Place a visual marker at the hit location
placeMarker(at: result.worldTransform, in: arView)
} else {
DispatchQueue.main.async {
self.lastRaycastResult = "No surface detected"
}
}
}
private func placeMarker(at transform: simd_float4x4, in arView: ARView) {
// Create a small sphere to visualize the raycast hit
let sphere = MeshResource.generateSphere(radius: 0.02)
let material = SimpleMaterial(color: .green, isMetallic: false)
let modelEntity = ModelEntity(mesh: sphere, materials: [material])
// Create an anchor at the hit position
let anchorEntity = AnchorEntity(world: transform)
anchorEntity.addChild(modelEntity)
arView.scene.addAnchor(anchorEntity)
}
}
}

View File

@@ -1,292 +0,0 @@
//
// OvershootVisionService.swift
// SousChefAI
//
// Concrete implementation of VisionService using Overshoot API
// Provides low-latency real-time video inference for ingredient detection
//
import Foundation
@preconcurrency import CoreVideo
import UIKit
/// Overshoot API implementation for vision-based ingredient detection
final class OvershootVisionService: VisionService, @unchecked Sendable {
private let apiKey: String
private let webSocketURL: URL
private var webSocketTask: URLSessionWebSocketTask?
private let session: URLSession
nonisolated init(apiKey: String = AppConfig.overshootAPIKey,
webSocketURL: String = AppConfig.overshootWebSocketURL) {
self.apiKey = apiKey
guard let url = URL(string: webSocketURL) else {
fatalError("Invalid WebSocket URL: \(webSocketURL)")
}
self.webSocketURL = url
let config = URLSessionConfiguration.default
config.timeoutIntervalForRequest = 30
self.session = URLSession(configuration: config)
}
// MARK: - VisionService Protocol Implementation
func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
guard apiKey != "INSERT_KEY_HERE" else {
throw VisionServiceError.apiKeyMissing
}
// Connect to WebSocket
try await connectWebSocket()
var detectedIngredients: [String: Ingredient] = [:]
// Process frames from stream
for await pixelBuffer in stream {
do {
let frameIngredients = try await processFrame(pixelBuffer)
// Merge results (keep highest confidence for each ingredient)
for ingredient in frameIngredients {
if let existing = detectedIngredients[ingredient.name] {
if ingredient.confidence > existing.confidence {
detectedIngredients[ingredient.name] = ingredient
}
} else {
detectedIngredients[ingredient.name] = ingredient
}
}
// Limit to max ingredients
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
break
}
} catch {
print("Error processing frame: \(error)")
continue
}
}
disconnectWebSocket()
return Array(detectedIngredients.values)
.filter { $0.confidence >= AppConfig.minConfidenceThreshold }
.sorted { $0.confidence > $1.confidence }
}
func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
guard apiKey != "INSERT_KEY_HERE" else {
throw VisionServiceError.apiKeyMissing
}
// For single frame, use REST API instead of WebSocket
return try await detectIngredientsViaREST(pixelBuffer)
}
func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
guard apiKey != "INSERT_KEY_HERE" else {
throw VisionServiceError.apiKeyMissing
}
// Connect to WebSocket for real-time monitoring
try await connectWebSocket()
var latestProgress = CookingProgress(isComplete: false, confidence: 0.0, feedback: "Analyzing...")
// Monitor frames for cooking completion
for await pixelBuffer in stream {
do {
let progress = try await analyzeCookingFrame(pixelBuffer, step: step)
latestProgress = progress
if progress.isComplete && progress.confidence > 0.8 {
disconnectWebSocket()
return progress
}
} catch {
print("Error analyzing cooking frame: \(error)")
continue
}
}
disconnectWebSocket()
return latestProgress
}
// MARK: - Private Helper Methods
private func connectWebSocket() async throws {
var request = URLRequest(url: webSocketURL)
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
webSocketTask = session.webSocketTask(with: request)
webSocketTask?.resume()
// Wait for connection
try await Task.sleep(for: .milliseconds(500))
}
private func disconnectWebSocket() {
webSocketTask?.cancel(with: .goingAway, reason: nil)
webSocketTask = nil
}
private func processFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
// Convert pixel buffer to JPEG data
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
throw VisionServiceError.invalidResponse
}
// Create WebSocket message
let message = OvershootRequest(
type: "detect_ingredients",
image: imageData.base64EncodedString(),
timestamp: Date().timeIntervalSince1970
)
// Send frame via WebSocket
let messageData = try JSONEncoder().encode(message)
let messageString = String(data: messageData, encoding: .utf8)!
try await webSocketTask?.send(.string(messageString))
// Receive response
guard let response = try await receiveWebSocketMessage() else {
return []
}
return parseIngredients(from: response)
}
private func analyzeCookingFrame(_ pixelBuffer: CVPixelBuffer, step: String) async throws -> CookingProgress {
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
throw VisionServiceError.invalidResponse
}
let message = OvershootRequest(
type: "analyze_cooking",
image: imageData.base64EncodedString(),
timestamp: Date().timeIntervalSince1970,
context: step
)
let messageData = try JSONEncoder().encode(message)
let messageString = String(data: messageData, encoding: .utf8)!
try await webSocketTask?.send(.string(messageString))
guard let response = try await receiveWebSocketMessage() else {
return CookingProgress(isComplete: false, confidence: 0.0, feedback: "No response")
}
return parseCookingProgress(from: response)
}
private func receiveWebSocketMessage() async throws -> OvershootResponse? {
guard let message = try await webSocketTask?.receive() else {
return nil
}
switch message {
case .string(let text):
guard let data = text.data(using: .utf8) else { return nil }
return try? JSONDecoder().decode(OvershootResponse.self, from: data)
case .data(let data):
return try? JSONDecoder().decode(OvershootResponse.self, from: data)
@unknown default:
return nil
}
}
private func detectIngredientsViaREST(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
// Fallback REST API implementation
// This would be used for single-frame detection
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
throw VisionServiceError.invalidResponse
}
var request = URLRequest(url: URL(string: "https://api.overshoot.ai/v1/detect")!)
request.httpMethod = "POST"
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
let requestBody = OvershootRequest(
type: "detect_ingredients",
image: imageData.base64EncodedString(),
timestamp: Date().timeIntervalSince1970
)
request.httpBody = try JSONEncoder().encode(requestBody)
let (data, _) = try await session.data(for: request)
let response = try JSONDecoder().decode(OvershootResponse.self, from: data)
return parseIngredients(from: response)
}
private func parseIngredients(from response: OvershootResponse) -> [Ingredient] {
guard let detections = response.detections else { return [] }
return detections.map { detection in
Ingredient(
name: detection.label,
estimatedQuantity: detection.quantity ?? "Unknown",
confidence: detection.confidence
)
}
}
private func parseCookingProgress(from response: OvershootResponse) -> CookingProgress {
CookingProgress(
isComplete: response.isComplete ?? false,
confidence: response.confidence ?? 0.0,
feedback: response.feedback ?? "Processing..."
)
}
private func pixelBufferToJPEG(_ pixelBuffer: CVPixelBuffer) -> Data? {
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
let context = CIContext()
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
return nil
}
let uiImage = UIImage(cgImage: cgImage)
return uiImage.jpegData(compressionQuality: 0.8)
}
}
// MARK: - Overshoot API Models
private struct OvershootRequest: Codable {
let type: String
let image: String
let timestamp: TimeInterval
var context: String?
}
private struct OvershootResponse: Codable {
let detections: [Detection]?
let isComplete: Bool?
let confidence: Double?
let feedback: String?
struct Detection: Codable {
let label: String
let confidence: Double
let quantity: String?
let boundingBox: BoundingBox?
}
struct BoundingBox: Codable {
let x: Double
let y: Double
let width: Double
let height: Double
}
}