AR version pre-test
This commit is contained in:
220
SousChefAI/Services/ARVisionService.swift
Normal file
220
SousChefAI/Services/ARVisionService.swift
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
//
|
||||||
|
// ARVisionService.swift
|
||||||
|
// SousChefAI
|
||||||
|
//
|
||||||
|
// AR-based vision service using RealityKit and ARKit
|
||||||
|
// Provides real-time plane detection and raycasting capabilities
|
||||||
|
//
|
||||||
|
|
||||||
|
import Foundation
|
||||||
|
import SwiftUI
|
||||||
|
import RealityKit
|
||||||
|
import ARKit
|
||||||
|
@preconcurrency import CoreVideo
|
||||||
|
|
||||||
|
/// AR-based implementation for vision and spatial scanning
|
||||||
|
final class ARVisionService: VisionService, @unchecked Sendable {
|
||||||
|
|
||||||
|
// MARK: - VisionService Protocol Implementation
|
||||||
|
|
||||||
|
func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
|
||||||
|
// Mock implementation - in a real app, this would use ML models
|
||||||
|
// to detect ingredients from AR camera frames
|
||||||
|
var detectedIngredients: [Ingredient] = []
|
||||||
|
var frameCount = 0
|
||||||
|
|
||||||
|
for await pixelBuffer in stream {
|
||||||
|
frameCount += 1
|
||||||
|
|
||||||
|
// Process every 30th frame to reduce processing load
|
||||||
|
if frameCount % 30 == 0 {
|
||||||
|
let ingredients = try await processARFrame(pixelBuffer)
|
||||||
|
|
||||||
|
// Merge results
|
||||||
|
for ingredient in ingredients {
|
||||||
|
if !detectedIngredients.contains(where: { $0.name == ingredient.name }) {
|
||||||
|
detectedIngredients.append(ingredient)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop after collecting enough ingredients
|
||||||
|
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return detectedIngredients
|
||||||
|
.filter { $0.confidence >= AppConfig.minConfidenceThreshold }
|
||||||
|
.sorted { $0.confidence > $1.confidence }
|
||||||
|
}
|
||||||
|
|
||||||
|
func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||||
|
return try await processARFrame(pixelBuffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
|
||||||
|
// Mock implementation for cooking progress monitoring
|
||||||
|
return CookingProgress(
|
||||||
|
isComplete: false,
|
||||||
|
confidence: 0.5,
|
||||||
|
feedback: "Monitoring cooking progress..."
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Private Helper Methods
|
||||||
|
|
||||||
|
private func processARFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||||
|
// Mock ingredient detection
|
||||||
|
// In a real implementation, this would use Vision framework or ML models
|
||||||
|
// to detect objects in the AR camera feed
|
||||||
|
|
||||||
|
// For now, return empty array - actual detection would happen here
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// SwiftUI wrapper for ARView with plane detection and raycasting
|
||||||
|
struct ARViewContainer: UIViewRepresentable {
|
||||||
|
@Binding var detectedPlanes: Int
|
||||||
|
@Binding var lastRaycastResult: String
|
||||||
|
|
||||||
|
func makeUIView(context: Context) -> ARView {
|
||||||
|
let arView = ARView(frame: .zero)
|
||||||
|
|
||||||
|
// Configure AR session
|
||||||
|
let configuration = ARWorldTrackingConfiguration()
|
||||||
|
|
||||||
|
// Enable plane detection for horizontal and vertical surfaces
|
||||||
|
configuration.planeDetection = [.horizontal, .vertical]
|
||||||
|
|
||||||
|
// Enable scene reconstruction for better spatial understanding
|
||||||
|
if ARWorldTrackingConfiguration.supportsSceneReconstruction(.mesh) {
|
||||||
|
configuration.sceneReconstruction = .mesh
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enable debug options to visualize detected planes
|
||||||
|
arView.debugOptions = [.showSceneUnderstanding, .showWorldOrigin]
|
||||||
|
|
||||||
|
// Set the coordinator as the session delegate
|
||||||
|
arView.session.delegate = context.coordinator
|
||||||
|
|
||||||
|
// Run the AR session
|
||||||
|
arView.session.run(configuration)
|
||||||
|
|
||||||
|
// Add tap gesture for raycasting
|
||||||
|
let tapGesture = UITapGestureRecognizer(target: context.coordinator, action: #selector(Coordinator.handleTap(_:)))
|
||||||
|
arView.addGestureRecognizer(tapGesture)
|
||||||
|
|
||||||
|
context.coordinator.arView = arView
|
||||||
|
|
||||||
|
return arView
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateUIView(_ uiView: ARView, context: Context) {
|
||||||
|
// Update UI if needed
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeCoordinator() -> Coordinator {
|
||||||
|
Coordinator(detectedPlanes: $detectedPlanes, lastRaycastResult: $lastRaycastResult)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Coordinator
|
||||||
|
|
||||||
|
class Coordinator: NSObject, ARSessionDelegate {
|
||||||
|
@Binding var detectedPlanes: Int
|
||||||
|
@Binding var lastRaycastResult: String
|
||||||
|
weak var arView: ARView?
|
||||||
|
private var detectedPlaneAnchors: Set<UUID> = []
|
||||||
|
|
||||||
|
init(detectedPlanes: Binding<Int>, lastRaycastResult: Binding<String>) {
|
||||||
|
_detectedPlanes = detectedPlanes
|
||||||
|
_lastRaycastResult = lastRaycastResult
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - ARSessionDelegate Methods
|
||||||
|
|
||||||
|
func session(_ session: ARSession, didAdd anchors: [ARAnchor]) {
|
||||||
|
for anchor in anchors {
|
||||||
|
if let planeAnchor = anchor as? ARPlaneAnchor {
|
||||||
|
detectedPlaneAnchors.insert(planeAnchor.identifier)
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
self.detectedPlanes = self.detectedPlaneAnchors.count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func session(_ session: ARSession, didUpdate anchors: [ARAnchor]) {
|
||||||
|
// Planes are being updated as AR refines understanding
|
||||||
|
}
|
||||||
|
|
||||||
|
func session(_ session: ARSession, didRemove anchors: [ARAnchor]) {
|
||||||
|
for anchor in anchors {
|
||||||
|
if let planeAnchor = anchor as? ARPlaneAnchor {
|
||||||
|
detectedPlaneAnchors.remove(planeAnchor.identifier)
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
self.detectedPlanes = self.detectedPlaneAnchors.count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func session(_ session: ARSession, didFailWithError error: Error) {
|
||||||
|
print("AR Session failed: \(error.localizedDescription)")
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - Raycasting
|
||||||
|
|
||||||
|
/// Performs a raycast from screen center to detect planes
|
||||||
|
func performRaycast(from point: CGPoint, in view: ARView) -> ARRaycastResult? {
|
||||||
|
// Create raycast query targeting estimated planes
|
||||||
|
guard let query = view.makeRaycastQuery(
|
||||||
|
from: point,
|
||||||
|
allowing: .estimatedPlane,
|
||||||
|
alignment: .any
|
||||||
|
) else {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform the raycast
|
||||||
|
let results = view.session.raycast(query)
|
||||||
|
return results.first
|
||||||
|
}
|
||||||
|
|
||||||
|
@objc func handleTap(_ gesture: UITapGestureRecognizer) {
|
||||||
|
guard let arView = arView else { return }
|
||||||
|
|
||||||
|
let location = gesture.location(in: arView)
|
||||||
|
|
||||||
|
if let result = performRaycast(from: location, in: arView) {
|
||||||
|
let position = result.worldTransform.columns.3
|
||||||
|
let resultString = String(format: "Hit at: (%.2f, %.2f, %.2f)", position.x, position.y, position.z)
|
||||||
|
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
self.lastRaycastResult = resultString
|
||||||
|
}
|
||||||
|
|
||||||
|
// Place a visual marker at the hit location
|
||||||
|
placeMarker(at: result.worldTransform, in: arView)
|
||||||
|
} else {
|
||||||
|
DispatchQueue.main.async {
|
||||||
|
self.lastRaycastResult = "No surface detected"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private func placeMarker(at transform: simd_float4x4, in arView: ARView) {
|
||||||
|
// Create a small sphere to visualize the raycast hit
|
||||||
|
let sphere = MeshResource.generateSphere(radius: 0.02)
|
||||||
|
let material = SimpleMaterial(color: .green, isMetallic: false)
|
||||||
|
let modelEntity = ModelEntity(mesh: sphere, materials: [material])
|
||||||
|
|
||||||
|
// Create an anchor at the hit position
|
||||||
|
let anchorEntity = AnchorEntity(world: transform)
|
||||||
|
anchorEntity.addChild(modelEntity)
|
||||||
|
|
||||||
|
arView.scene.addAnchor(anchorEntity)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,292 +0,0 @@
|
|||||||
//
|
|
||||||
// OvershootVisionService.swift
|
|
||||||
// SousChefAI
|
|
||||||
//
|
|
||||||
// Concrete implementation of VisionService using Overshoot API
|
|
||||||
// Provides low-latency real-time video inference for ingredient detection
|
|
||||||
//
|
|
||||||
|
|
||||||
import Foundation
|
|
||||||
@preconcurrency import CoreVideo
|
|
||||||
import UIKit
|
|
||||||
|
|
||||||
/// Overshoot API implementation for vision-based ingredient detection
|
|
||||||
final class OvershootVisionService: VisionService, @unchecked Sendable {
|
|
||||||
|
|
||||||
private let apiKey: String
|
|
||||||
private let webSocketURL: URL
|
|
||||||
private var webSocketTask: URLSessionWebSocketTask?
|
|
||||||
private let session: URLSession
|
|
||||||
|
|
||||||
nonisolated init(apiKey: String = AppConfig.overshootAPIKey,
|
|
||||||
webSocketURL: String = AppConfig.overshootWebSocketURL) {
|
|
||||||
self.apiKey = apiKey
|
|
||||||
guard let url = URL(string: webSocketURL) else {
|
|
||||||
fatalError("Invalid WebSocket URL: \(webSocketURL)")
|
|
||||||
}
|
|
||||||
self.webSocketURL = url
|
|
||||||
|
|
||||||
let config = URLSessionConfiguration.default
|
|
||||||
config.timeoutIntervalForRequest = 30
|
|
||||||
self.session = URLSession(configuration: config)
|
|
||||||
}
|
|
||||||
|
|
||||||
// MARK: - VisionService Protocol Implementation
|
|
||||||
|
|
||||||
func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
|
|
||||||
guard apiKey != "INSERT_KEY_HERE" else {
|
|
||||||
throw VisionServiceError.apiKeyMissing
|
|
||||||
}
|
|
||||||
|
|
||||||
// Connect to WebSocket
|
|
||||||
try await connectWebSocket()
|
|
||||||
|
|
||||||
var detectedIngredients: [String: Ingredient] = [:]
|
|
||||||
|
|
||||||
// Process frames from stream
|
|
||||||
for await pixelBuffer in stream {
|
|
||||||
do {
|
|
||||||
let frameIngredients = try await processFrame(pixelBuffer)
|
|
||||||
|
|
||||||
// Merge results (keep highest confidence for each ingredient)
|
|
||||||
for ingredient in frameIngredients {
|
|
||||||
if let existing = detectedIngredients[ingredient.name] {
|
|
||||||
if ingredient.confidence > existing.confidence {
|
|
||||||
detectedIngredients[ingredient.name] = ingredient
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
detectedIngredients[ingredient.name] = ingredient
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Limit to max ingredients
|
|
||||||
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
print("Error processing frame: \(error)")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
disconnectWebSocket()
|
|
||||||
|
|
||||||
return Array(detectedIngredients.values)
|
|
||||||
.filter { $0.confidence >= AppConfig.minConfidenceThreshold }
|
|
||||||
.sorted { $0.confidence > $1.confidence }
|
|
||||||
}
|
|
||||||
|
|
||||||
func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
|
||||||
guard apiKey != "INSERT_KEY_HERE" else {
|
|
||||||
throw VisionServiceError.apiKeyMissing
|
|
||||||
}
|
|
||||||
|
|
||||||
// For single frame, use REST API instead of WebSocket
|
|
||||||
return try await detectIngredientsViaREST(pixelBuffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
|
|
||||||
guard apiKey != "INSERT_KEY_HERE" else {
|
|
||||||
throw VisionServiceError.apiKeyMissing
|
|
||||||
}
|
|
||||||
|
|
||||||
// Connect to WebSocket for real-time monitoring
|
|
||||||
try await connectWebSocket()
|
|
||||||
|
|
||||||
var latestProgress = CookingProgress(isComplete: false, confidence: 0.0, feedback: "Analyzing...")
|
|
||||||
|
|
||||||
// Monitor frames for cooking completion
|
|
||||||
for await pixelBuffer in stream {
|
|
||||||
do {
|
|
||||||
let progress = try await analyzeCookingFrame(pixelBuffer, step: step)
|
|
||||||
latestProgress = progress
|
|
||||||
|
|
||||||
if progress.isComplete && progress.confidence > 0.8 {
|
|
||||||
disconnectWebSocket()
|
|
||||||
return progress
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
print("Error analyzing cooking frame: \(error)")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
disconnectWebSocket()
|
|
||||||
return latestProgress
|
|
||||||
}
|
|
||||||
|
|
||||||
// MARK: - Private Helper Methods
|
|
||||||
|
|
||||||
private func connectWebSocket() async throws {
|
|
||||||
var request = URLRequest(url: webSocketURL)
|
|
||||||
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
|
||||||
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
|
||||||
|
|
||||||
webSocketTask = session.webSocketTask(with: request)
|
|
||||||
webSocketTask?.resume()
|
|
||||||
|
|
||||||
// Wait for connection
|
|
||||||
try await Task.sleep(for: .milliseconds(500))
|
|
||||||
}
|
|
||||||
|
|
||||||
private func disconnectWebSocket() {
|
|
||||||
webSocketTask?.cancel(with: .goingAway, reason: nil)
|
|
||||||
webSocketTask = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
private func processFrame(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
|
||||||
// Convert pixel buffer to JPEG data
|
|
||||||
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
|
|
||||||
throw VisionServiceError.invalidResponse
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create WebSocket message
|
|
||||||
let message = OvershootRequest(
|
|
||||||
type: "detect_ingredients",
|
|
||||||
image: imageData.base64EncodedString(),
|
|
||||||
timestamp: Date().timeIntervalSince1970
|
|
||||||
)
|
|
||||||
|
|
||||||
// Send frame via WebSocket
|
|
||||||
let messageData = try JSONEncoder().encode(message)
|
|
||||||
let messageString = String(data: messageData, encoding: .utf8)!
|
|
||||||
|
|
||||||
try await webSocketTask?.send(.string(messageString))
|
|
||||||
|
|
||||||
// Receive response
|
|
||||||
guard let response = try await receiveWebSocketMessage() else {
|
|
||||||
return []
|
|
||||||
}
|
|
||||||
|
|
||||||
return parseIngredients(from: response)
|
|
||||||
}
|
|
||||||
|
|
||||||
private func analyzeCookingFrame(_ pixelBuffer: CVPixelBuffer, step: String) async throws -> CookingProgress {
|
|
||||||
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
|
|
||||||
throw VisionServiceError.invalidResponse
|
|
||||||
}
|
|
||||||
|
|
||||||
let message = OvershootRequest(
|
|
||||||
type: "analyze_cooking",
|
|
||||||
image: imageData.base64EncodedString(),
|
|
||||||
timestamp: Date().timeIntervalSince1970,
|
|
||||||
context: step
|
|
||||||
)
|
|
||||||
|
|
||||||
let messageData = try JSONEncoder().encode(message)
|
|
||||||
let messageString = String(data: messageData, encoding: .utf8)!
|
|
||||||
|
|
||||||
try await webSocketTask?.send(.string(messageString))
|
|
||||||
|
|
||||||
guard let response = try await receiveWebSocketMessage() else {
|
|
||||||
return CookingProgress(isComplete: false, confidence: 0.0, feedback: "No response")
|
|
||||||
}
|
|
||||||
|
|
||||||
return parseCookingProgress(from: response)
|
|
||||||
}
|
|
||||||
|
|
||||||
private func receiveWebSocketMessage() async throws -> OvershootResponse? {
|
|
||||||
guard let message = try await webSocketTask?.receive() else {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
switch message {
|
|
||||||
case .string(let text):
|
|
||||||
guard let data = text.data(using: .utf8) else { return nil }
|
|
||||||
return try? JSONDecoder().decode(OvershootResponse.self, from: data)
|
|
||||||
case .data(let data):
|
|
||||||
return try? JSONDecoder().decode(OvershootResponse.self, from: data)
|
|
||||||
@unknown default:
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private func detectIngredientsViaREST(_ pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
|
||||||
// Fallback REST API implementation
|
|
||||||
// This would be used for single-frame detection
|
|
||||||
|
|
||||||
guard let imageData = pixelBufferToJPEG(pixelBuffer) else {
|
|
||||||
throw VisionServiceError.invalidResponse
|
|
||||||
}
|
|
||||||
|
|
||||||
var request = URLRequest(url: URL(string: "https://api.overshoot.ai/v1/detect")!)
|
|
||||||
request.httpMethod = "POST"
|
|
||||||
request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
|
|
||||||
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
|
||||||
|
|
||||||
let requestBody = OvershootRequest(
|
|
||||||
type: "detect_ingredients",
|
|
||||||
image: imageData.base64EncodedString(),
|
|
||||||
timestamp: Date().timeIntervalSince1970
|
|
||||||
)
|
|
||||||
|
|
||||||
request.httpBody = try JSONEncoder().encode(requestBody)
|
|
||||||
|
|
||||||
let (data, _) = try await session.data(for: request)
|
|
||||||
let response = try JSONDecoder().decode(OvershootResponse.self, from: data)
|
|
||||||
|
|
||||||
return parseIngredients(from: response)
|
|
||||||
}
|
|
||||||
|
|
||||||
private func parseIngredients(from response: OvershootResponse) -> [Ingredient] {
|
|
||||||
guard let detections = response.detections else { return [] }
|
|
||||||
|
|
||||||
return detections.map { detection in
|
|
||||||
Ingredient(
|
|
||||||
name: detection.label,
|
|
||||||
estimatedQuantity: detection.quantity ?? "Unknown",
|
|
||||||
confidence: detection.confidence
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private func parseCookingProgress(from response: OvershootResponse) -> CookingProgress {
|
|
||||||
CookingProgress(
|
|
||||||
isComplete: response.isComplete ?? false,
|
|
||||||
confidence: response.confidence ?? 0.0,
|
|
||||||
feedback: response.feedback ?? "Processing..."
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
private func pixelBufferToJPEG(_ pixelBuffer: CVPixelBuffer) -> Data? {
|
|
||||||
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
|
|
||||||
let context = CIContext()
|
|
||||||
|
|
||||||
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
let uiImage = UIImage(cgImage: cgImage)
|
|
||||||
return uiImage.jpegData(compressionQuality: 0.8)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// MARK: - Overshoot API Models
|
|
||||||
|
|
||||||
private struct OvershootRequest: Codable {
|
|
||||||
let type: String
|
|
||||||
let image: String
|
|
||||||
let timestamp: TimeInterval
|
|
||||||
var context: String?
|
|
||||||
}
|
|
||||||
|
|
||||||
private struct OvershootResponse: Codable {
|
|
||||||
let detections: [Detection]?
|
|
||||||
let isComplete: Bool?
|
|
||||||
let confidence: Double?
|
|
||||||
let feedback: String?
|
|
||||||
|
|
||||||
struct Detection: Codable {
|
|
||||||
let label: String
|
|
||||||
let confidence: Double
|
|
||||||
let quantity: String?
|
|
||||||
let boundingBox: BoundingBox?
|
|
||||||
}
|
|
||||||
|
|
||||||
struct BoundingBox: Codable {
|
|
||||||
let x: Double
|
|
||||||
let y: Double
|
|
||||||
let width: Double
|
|
||||||
let height: Double
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user