AR version pre-test
This commit is contained in:
220
SousChefAI/Services/ARVisionService.swift
Normal file
220
SousChefAI/Services/ARVisionService.swift
Normal file
@@ -0,0 +1,220 @@
|
||||
//
|
||||
// ARVisionService.swift
|
||||
// SousChefAI
|
||||
//
|
||||
// AR-based vision service using RealityKit and ARKit
|
||||
// Provides real-time plane detection and raycasting capabilities
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftUI
|
||||
import RealityKit
|
||||
import ARKit
|
||||
@preconcurrency import CoreVideo
|
||||
|
||||
/// AR-based implementation for vision and spatial scanning
|
||||
final class ARVisionService: VisionService, @unchecked Sendable {
|
||||
|
||||
// MARK: - VisionService Protocol Implementation
|
||||
|
||||
func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
|
||||
// Mock implementation - in a real app, this would use ML models
|
||||
// to detect ingredients from AR camera frames
|
||||
var detectedIngredients: [Ingredient] = []
|
||||
var frameCount = 0
|
||||
|
||||
for await pixelBuffer in stream {
|
||||
frameCount += 1
|
||||
|
||||
// Process every 30th frame to reduce processing load
|
||||
if frameCount % 30 == 0 {
|
||||
let ingredients = try await processARFrame(pixelBuffer)
|
||||
|
||||
// Merge results
|
||||
for ingredient in ingredients {
|
||||
if !detectedIngredients.contains(where: { $0.name == ingredient.name }) {
|
||||
detectedIngredients.append(ingredient)
|
||||
}
|
||||
}
|
||||
|
||||
// Stop after collecting enough ingredients
|
||||
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return detectedIngredients
|
||||
.filter { $0.confidence >= AppConfig.minConfidenceThreshold }
|
||||
.sorted { $0.confidence > $1.confidence }
|
||||
}
|
||||
|
||||
func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||
return try await processARFrame(pixelBuffer)
|
||||
}
|
||||
|
||||
func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
|
||||
// Mock implementation for cooking progress monitoring
|
||||
return CookingProgress(
|
||||
isComplete: false,
|
||||
confidence: 0.5,
|
||||
feedback: "Monitoring cooking progress..."
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Private Helper Methods
|
||||
|
||||
private func processARFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||
// Mock ingredient detection
|
||||
// In a real implementation, this would use Vision framework or ML models
|
||||
// to detect objects in the AR camera feed
|
||||
|
||||
// For now, return empty array - actual detection would happen here
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
/// SwiftUI wrapper for ARView with plane detection and raycasting
|
||||
struct ARViewContainer: UIViewRepresentable {
|
||||
@Binding var detectedPlanes: Int
|
||||
@Binding var lastRaycastResult: String
|
||||
|
||||
func makeUIView(context: Context) -> ARView {
|
||||
let arView = ARView(frame: .zero)
|
||||
|
||||
// Configure AR session
|
||||
let configuration = ARWorldTrackingConfiguration()
|
||||
|
||||
// Enable plane detection for horizontal and vertical surfaces
|
||||
configuration.planeDetection = [.horizontal, .vertical]
|
||||
|
||||
// Enable scene reconstruction for better spatial understanding
|
||||
if ARWorldTrackingConfiguration.supportsSceneReconstruction(.mesh) {
|
||||
configuration.sceneReconstruction = .mesh
|
||||
}
|
||||
|
||||
// Enable debug options to visualize detected planes
|
||||
arView.debugOptions = [.showSceneUnderstanding, .showWorldOrigin]
|
||||
|
||||
// Set the coordinator as the session delegate
|
||||
arView.session.delegate = context.coordinator
|
||||
|
||||
// Run the AR session
|
||||
arView.session.run(configuration)
|
||||
|
||||
// Add tap gesture for raycasting
|
||||
let tapGesture = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap(_:)))
|
||||
arView.addGestureRecognizer(tapGesture)
|
||||
|
||||
context.coordinator.arView = arView
|
||||
|
||||
return arView
|
||||
}
|
||||
|
||||
func updateUIView(_ uiView: ARView, context: Context) {
|
||||
// Update UI if needed
|
||||
}
|
||||
|
||||
func makeCoordinator() -> Coordinator {
|
||||
Coordinator(detectedPlanes: $detectedPlanes, lastRaycastResult: $lastRaycastResult)
|
||||
}
|
||||
|
||||
// MARK: - Coordinator
|
||||
|
||||
class Coordinator: NSObject, ARSessionDelegate {
|
||||
@Binding var detectedPlanes: Int
|
||||
@Binding var lastRaycastResult: String
|
||||
weak var arView: ARView?
|
||||
private var detectedPlaneAnchors: Set<UUID> = []
|
||||
|
||||
init(detectedPlanes: Binding<Int>, lastRaycastResult: Binding<String>) {
|
||||
_detectedPlanes = detectedPlanes
|
||||
_lastRaycastResult = lastRaycastResult
|
||||
}
|
||||
|
||||
// MARK: - ARSessionDelegate Methods
|
||||
|
||||
func session(_ session: ARSession, didAdd anchors: [ARAnchor]) {
|
||||
for anchor in anchors {
|
||||
if let planeAnchor = anchor as? ARPlaneAnchor {
|
||||
detectedPlaneAnchors.insert(planeAnchor.identifier)
|
||||
DispatchQueue.main.async {
|
||||
self.detectedPlanes = self.detectedPlaneAnchors.count
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func session(_ session: ARSession, didUpdate anchors: [ARAnchor]) {
|
||||
// Planes are being updated as AR refines understanding
|
||||
}
|
||||
|
||||
func session(_ session: ARSession, didRemove anchors: [ARAnchor]) {
|
||||
for anchor in anchors {
|
||||
if let planeAnchor = anchor as? ARPlaneAnchor {
|
||||
detectedPlaneAnchors.remove(planeAnchor.identifier)
|
||||
DispatchQueue.main.async {
|
||||
self.detectedPlanes = self.detectedPlaneAnchors.count
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func session(_ session: ARSession, didFailWithError error: Error) {
|
||||
print("AR Session failed: \(error.localizedDescription)")
|
||||
}
|
||||
|
||||
// MARK: - Raycasting
|
||||
|
||||
/// Performs a raycast from screen center to detect planes
|
||||
func performRaycast(from point: CGPoint, in view: ARView) -> ARRaycastResult? {
|
||||
// Create raycast query targeting estimated planes
|
||||
guard let query = view.makeRaycastQuery(
|
||||
from: point,
|
||||
allowing: .estimatedPlane,
|
||||
alignment: .any
|
||||
) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Perform the raycast
|
||||
let results = view.session.raycast(query)
|
||||
return results.first
|
||||
}
|
||||
|
||||
@objc func handleTap(_ gesture: UITapGestureRecognizer) {
|
||||
guard let arView = arView else { return }
|
||||
|
||||
let location = gesture.location(in: arView)
|
||||
|
||||
if let result = performRaycast(from: location, in: arView) {
|
||||
let position = result.worldTransform.columns.3
|
||||
let resultString = String(format: "Hit at: (%.2f, %.2f, %.2f)", position.x, position.y, position.z)
|
||||
|
||||
DispatchQueue.main.async {
|
||||
self.lastRaycastResult = resultString
|
||||
}
|
||||
|
||||
// Place a visual marker at the hit location
|
||||
placeMarker(at: result.worldTransform, in: arView)
|
||||
} else {
|
||||
DispatchQueue.main.async {
|
||||
self.lastRaycastResult = "No surface detected"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func placeMarker(at transform: simd_float4x4, in arView: ARView) {
|
||||
// Create a small sphere to visualize the raycast hit
|
||||
let sphere = MeshResource.generateSphere(radius: 0.02)
|
||||
let material = SimpleMaterial(color: .green, isMetallic: false)
|
||||
let modelEntity = ModelEntity(mesh: sphere, materials: [material])
|
||||
|
||||
// Create an anchor at the hit position
|
||||
let anchorEntity = AnchorEntity(world: transform)
|
||||
anchorEntity.addChild(modelEntity)
|
||||
|
||||
arView.scene.addAnchor(anchorEntity)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,292 +0,0 @@
|
||||
//
|
||||
// OvershootVisionService.swift
|
||||
// SousChefAI
|
||||
//
|
||||
// Concrete implementation of VisionService using Overshoot API
|
||||
// Provides low-latency real-time video inference for ingredient detection
|
||||
//
|
||||
|
||||
import Foundation
|
||||
@preconcurrency import CoreVideo
|
||||
import UIKit
|
||||
|
||||
/// Overshoot API implementation for vision-based ingredient detection
|
||||
final class OvershootVisionService: VisionService, @unchecked Sendable {
|
||||
|
||||
private let apiKey: String
|
||||
private let webSocketURL: URL
|
||||
private var webSocketTask: URLSessionWebSocketTask?
|
||||
private let session: URLSession
|
||||
|
||||
nonisolated init(apiKey: String = AppConfig.overshootAPIKey,
|
||||
webSocketURL: String = AppConfig.overshootWebSocketURL) {
|
||||
self.apiKey = apiKey
|
||||
guard let url = URL(string: webSocketURL) else {
|
||||
fatalError("Invalid WebSocket URL: \(webSocketURL)")
|
||||
}
|
||||
self.webSocketURL = url
|
||||
|
||||
let config = URLSessionConfiguration.default
|
||||
config.timeoutIntervalForRequest = 30
|
||||
self.session = URLSession(configuration: config)
|
||||
}
|
||||
|
||||
// MARK: - VisionService Protocol Implementation
|
||||
|
||||
func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
|
||||
guard apiKey != "INSERT_KEY_HERE" else {
|
||||
throw VisionServiceError.apiKeyMissing
|
||||
}
|
||||
|
||||
// Connect to WebSocket
|
||||
try await connectWebSocket()
|
||||
|
||||
var detectedIngredients: [String: Ingredient] = [:]
|
||||
|
||||
// Process frames from stream
|
||||
for await pixelBuffer in stream {
|
||||
do {
|
||||
let frameIngredients = try await processFrame(pixelBuffer)
|
||||
|
||||
// Merge results (keep highest confidence for each ingredient)
|
||||
for ingredient in frameIngredients {
|
||||
if let existing = detectedIngredients[ingredient.name] {
|
||||
if ingredient.confidence > existing.confidence {
|
||||
detectedIngredients[ingredient.name] = ingredient
|
||||
}
|
||||
} else {
|
||||
detectedIngredients[ingredient.name] = ingredient
|
||||
}
|
||||
}
|
||||
|
||||
// Limit to max ingredients
|
||||
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
|
||||
break
|
||||
}
|
||||
} catch {
|
||||
print("Error processing frame: \(error)")
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
disconnectWebSocket()
|
||||
|
||||
return Array(detectedIngredients.values)
|
||||
.filter { $0.confidence >= AppConfig.minConfidenceThreshold }
|
||||
.sorted { $0.confidence > $1.confidence }
|
||||
}
|
||||
|
||||
func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||
guard apiKey != "INSERT_KEY_HERE" else {
|
||||
throw VisionServiceError.apiKeyMissing
|
||||
}
|
||||
|
||||
// For single frame, use REST API instead of WebSocket
|
||||
return try await detectIngredientsViaREST(pixelBuffer)
|
||||
}
|
||||
|
||||
func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
|
||||
guard apiKey != "INSERT_KEY_HERE" else {
|
||||
throw VisionServiceError.apiKeyMissing
|
||||
}
|
||||
|
||||
// Connect to WebSocket for real-time monitoring
|
||||
try await connectWebSocket()
|
||||
|
||||
var latestProgress = CookingProgress(isComplete: false, confidence: 0.0, feedback: "Analyzing...")
|
||||
|
||||
// Monitor frames for cooking completion
|
||||
for await pixelBuffer in stream {
|
||||
do {
|
||||
let progress = try await analyzeCookingFrame(pixelBuffer, step: step)
|
||||
latestProgress = progress
|
||||
|
||||
if progress.isComplete && progress.confidence > 0.8 {
|
||||
disconnectWebSocket()
|
||||
return progress
|
||||
}
|
||||
} catch {
|
||||
print("Error analyzing cooking frame: \(error)")
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
disconnectWebSocket()
|
||||
return latestProgress
|
||||
}
|
||||
|
||||
// MARK: - Private Helper Methods
|
||||
|
||||
private func connectWebSocket() async throws {
|
||||
var request = URLRequest(url: webSocketURL)
|
||||
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
||||
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
||||
|
||||
webSocketTask = session.webSocketTask(with: request)
|
||||
webSocketTask?.resume()
|
||||
|
||||
// Wait for connection
|
||||
try await Task.sleep(for: .milliseconds(500))
|
||||
}
|
||||
|
||||
private func disconnectWebSocket() {
|
||||
webSocketTask?.cancel(with: .goingAway, reason: nil)
|
||||
webSocketTask = nil
|
||||
}
|
||||
|
||||
private func processFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||
// Convert pixel buffer to JPEG data
|
||||
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
|
||||
throw VisionServiceError.invalidResponse
|
||||
}
|
||||
|
||||
// Create WebSocket message
|
||||
let message = OvershootRequest(
|
||||
type: "detect_ingredients",
|
||||
image: imageData.base64EncodedString(),
|
||||
timestamp: Date().timeIntervalSince1970
|
||||
)
|
||||
|
||||
// Send frame via WebSocket
|
||||
let messageData = try JSONEncoder().encode(message)
|
||||
let messageString = String(data: messageData, encoding: .utf8)!
|
||||
|
||||
try await webSocketTask?.send(.string(messageString))
|
||||
|
||||
// Receive response
|
||||
guard let response = try await receiveWebSocketMessage() else {
|
||||
return []
|
||||
}
|
||||
|
||||
return parseIngredients(from: response)
|
||||
}
|
||||
|
||||
private func analyzeCookingFrame(_ pixelBuffer: CVPixelBuffer, step: String) async throws -> CookingProgress {
|
||||
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
|
||||
throw VisionServiceError.invalidResponse
|
||||
}
|
||||
|
||||
let message = OvershootRequest(
|
||||
type: "analyze_cooking",
|
||||
image: imageData.base64EncodedString(),
|
||||
timestamp: Date().timeIntervalSince1970,
|
||||
context: step
|
||||
)
|
||||
|
||||
let messageData = try JSONEncoder().encode(message)
|
||||
let messageString = String(data: messageData, encoding: .utf8)!
|
||||
|
||||
try await webSocketTask?.send(.string(messageString))
|
||||
|
||||
guard let response = try await receiveWebSocketMessage() else {
|
||||
return CookingProgress(isComplete: false, confidence: 0.0, feedback: "No response")
|
||||
}
|
||||
|
||||
return parseCookingProgress(from: response)
|
||||
}
|
||||
|
||||
private func receiveWebSocketMessage() async throws -> OvershootResponse? {
|
||||
guard let message = try await webSocketTask?.receive() else {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch message {
|
||||
case .string(let text):
|
||||
guard let data = text.data(using: .utf8) else { return nil }
|
||||
return try? JSONDecoder().decode(OvershootResponse.self, from: data)
|
||||
case .data(let data):
|
||||
return try? JSONDecoder().decode(OvershootResponse.self, from: data)
|
||||
@unknown default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
private func detectIngredientsViaREST(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||
// Fallback REST API implementation
|
||||
// This would be used for single-frame detection
|
||||
|
||||
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
|
||||
throw VisionServiceError.invalidResponse
|
||||
}
|
||||
|
||||
var request = URLRequest(url: URL(string: "https://api.overshoot.ai/v1/detect")!)
|
||||
request.httpMethod = "POST"
|
||||
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
||||
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
||||
|
||||
let requestBody = OvershootRequest(
|
||||
type: "detect_ingredients",
|
||||
image: imageData.base64EncodedString(),
|
||||
timestamp: Date().timeIntervalSince1970
|
||||
)
|
||||
|
||||
request.httpBody = try JSONEncoder().encode(requestBody)
|
||||
|
||||
let (data, _) = try await session.data(for: request)
|
||||
let response = try JSONDecoder().decode(OvershootResponse.self, from: data)
|
||||
|
||||
return parseIngredients(from: response)
|
||||
}
|
||||
|
||||
private func parseIngredients(from response: OvershootResponse) -> [Ingredient] {
|
||||
guard let detections = response.detections else { return [] }
|
||||
|
||||
return detections.map { detection in
|
||||
Ingredient(
|
||||
name: detection.label,
|
||||
estimatedQuantity: detection.quantity ?? "Unknown",
|
||||
confidence: detection.confidence
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private func parseCookingProgress(from response: OvershootResponse) -> CookingProgress {
|
||||
CookingProgress(
|
||||
isComplete: response.isComplete ?? false,
|
||||
confidence: response.confidence ?? 0.0,
|
||||
feedback: response.feedback ?? "Processing..."
|
||||
)
|
||||
}
|
||||
|
||||
private func pixelBufferToJPEG(_ pixelBuffer: CVPixelBuffer) -> Data? {
|
||||
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
|
||||
let context = CIContext()
|
||||
|
||||
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let uiImage = UIImage(cgImage: cgImage)
|
||||
return uiImage.jpegData(compressionQuality: 0.8)
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Overshoot API Models
|
||||
|
||||
private struct OvershootRequest: Codable {
|
||||
let type: String
|
||||
let image: String
|
||||
let timestamp: TimeInterval
|
||||
var context: String?
|
||||
}
|
||||
|
||||
private struct OvershootResponse: Codable {
|
||||
let detections: [Detection]?
|
||||
let isComplete: Bool?
|
||||
let confidence: Double?
|
||||
let feedback: String?
|
||||
|
||||
struct Detection: Codable {
|
||||
let label: String
|
||||
let confidence: Double
|
||||
let quantity: String?
|
||||
let boundingBox: BoundingBox?
|
||||
}
|
||||
|
||||
struct BoundingBox: Codable {
|
||||
let x: Double
|
||||
let y: Double
|
||||
let width: Double
|
||||
let height: Double
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user