gemini stuff?
This commit is contained in:
@@ -7,6 +7,11 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
/// Scanning mode for ingredient detection
|
||||
enum ScanningMode: String, CaseIterable {
|
||||
case geminiVision // Uses Gemini API for image analysis (recommended)
|
||||
case arKit // Uses ARKit for spatial scanning (future implementation)
|
||||
}
|
||||
|
||||
enum AppConfig: Sendable {
|
||||
// MARK: - Google Gemini API
|
||||
@@ -22,13 +27,24 @@ enum AppConfig: Sendable {
|
||||
/// 2. Add it to the Xcode project root
|
||||
/// 3. Ensure it's added to the target
|
||||
|
||||
// MARK: - AR Configuration
|
||||
/// Enable AR-based scanning features
|
||||
static let enableARScanning = true
|
||||
// MARK: - Scanning Configuration
|
||||
|
||||
/// Current scanning mode - change this to switch between vision implementations
|
||||
/// Options: .geminiVision (uses Gemini API), .arKit (uses ARKit - future)
|
||||
static let scanningMode: ScanningMode = .geminiVision
|
||||
|
||||
/// Enable AR-based scanning features (legacy flag, use scanningMode instead)
|
||||
static let enableARScanning = false
|
||||
|
||||
// MARK: - Feature Flags
|
||||
static let enableRealTimeDetection = true
|
||||
static let enableCookingMode = true
|
||||
static let maxIngredientsPerScan = 50
|
||||
static let minConfidenceThreshold = 0.5
|
||||
|
||||
// MARK: - Scanning Settings
|
||||
/// How often to send frames to Gemini (in seconds)
|
||||
static let geminiFrameInterval: Double = 1.0
|
||||
/// Maximum scan duration before auto-stop (in seconds)
|
||||
static let maxScanDuration: Double = 60.0
|
||||
}
|
||||
|
||||
@@ -7,24 +7,41 @@
|
||||
|
||||
import Foundation
|
||||
|
||||
/// Represents an alternative guess for what an ingredient might be
|
||||
struct IngredientGuess: Identifiable, Codable, Equatable {
|
||||
var id: String { name }
|
||||
let name: String
|
||||
let confidence: Double
|
||||
}
|
||||
|
||||
struct Ingredient: Identifiable, Codable, Equatable {
|
||||
let id: String
|
||||
var name: String
|
||||
var estimatedQuantity: String
|
||||
var confidence: Double
|
||||
|
||||
/// Top 3 guesses for what this ingredient might be (from AI detection)
|
||||
var guesses: [IngredientGuess]
|
||||
|
||||
init(id: String = UUID().uuidString,
|
||||
name: String,
|
||||
estimatedQuantity: String,
|
||||
confidence: Double = 1.0) {
|
||||
confidence: Double = 1.0,
|
||||
guesses: [IngredientGuess] = []) {
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.estimatedQuantity = estimatedQuantity
|
||||
self.confidence = confidence
|
||||
self.guesses = guesses
|
||||
}
|
||||
|
||||
/// Indicates if the detection confidence is low and requires user verification
|
||||
var needsVerification: Bool {
|
||||
confidence < 0.7
|
||||
}
|
||||
|
||||
/// Returns the best guess name, or the current name if no guesses available
|
||||
var bestGuessName: String {
|
||||
guesses.first?.name ?? name
|
||||
}
|
||||
}
|
||||
|
||||
503
SousChefAI/Services/GeminiVisionService.swift
Normal file
503
SousChefAI/Services/GeminiVisionService.swift
Normal file
@@ -0,0 +1,503 @@
|
||||
//
|
||||
// GeminiVisionService.swift
|
||||
// SousChefAI
|
||||
//
|
||||
// Vision service using Google Gemini 3.0 Flash for ingredient detection
|
||||
// Sends least blurry frame per second to Gemini API for analysis
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import CoreVideo
|
||||
import CoreImage
|
||||
import Accelerate
|
||||
import UIKit
|
||||
|
||||
/// Gemini-based implementation for vision ingredient detection
|
||||
final class GeminiVisionService: VisionService, @unchecked Sendable {
|
||||
|
||||
private let apiKey: String
|
||||
private let modelName = "gemini-2.0-flash-exp" // Will update to 3.0 when available
|
||||
|
||||
nonisolated init(apiKey: String = AppConfig.geminiAPIKey) {
|
||||
self.apiKey = apiKey
|
||||
}
|
||||
|
||||
// MARK: - VisionService Protocol Implementation
|
||||
|
||||
nonisolated func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
|
||||
// This method is used for continuous scanning
|
||||
// Collect frames, pick least blurry per second, send to Gemini
|
||||
var allDetectedIngredients: [Ingredient] = []
|
||||
var currentSecondFrames: [(buffer: CVPixelBuffer, blurScore: Double, timestamp: Date)] = []
|
||||
var lastProcessTime = Date()
|
||||
|
||||
for await pixelBuffer in stream {
|
||||
let now = Date()
|
||||
let blurScore = calculateBlurScore(pixelBuffer)
|
||||
|
||||
currentSecondFrames.append((buffer: pixelBuffer, blurScore: blurScore, timestamp: now))
|
||||
|
||||
// Process every second
|
||||
if now.timeIntervalSince(lastProcessTime) >= 1.0 {
|
||||
// Find least blurry frame (highest Laplacian variance = sharpest)
|
||||
if let bestFrame = currentSecondFrames.max(by: { $0.blurScore < $1.blurScore }) {
|
||||
do {
|
||||
let ingredients = try await analyzeFrameWithGemini(
|
||||
bestFrame.buffer,
|
||||
existingIngredients: allDetectedIngredients
|
||||
)
|
||||
|
||||
// Debug output
|
||||
print("🔍 GeminiVisionService: Detected \(ingredients.count) items in frame")
|
||||
if !ingredients.isEmpty {
|
||||
let jsonData = try? JSONEncoder().encode(ingredients)
|
||||
if let jsonString = jsonData.flatMap({ String(data: $0, encoding: .utf8) }) {
|
||||
print("📋 JSON Response: \(jsonString)")
|
||||
}
|
||||
}
|
||||
|
||||
// Merge ingredients
|
||||
allDetectedIngredients = mergeIngredients(existing: allDetectedIngredients, new: ingredients)
|
||||
} catch {
|
||||
print("⚠️ GeminiVisionService: Frame analysis failed: \(error)")
|
||||
// Continue scanning on errors
|
||||
}
|
||||
}
|
||||
|
||||
currentSecondFrames.removeAll()
|
||||
lastProcessTime = now
|
||||
}
|
||||
|
||||
// Stop after reasonable scan time or max ingredients
|
||||
if allDetectedIngredients.count >= AppConfig.maxIngredientsPerScan {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return allDetectedIngredients
|
||||
}
|
||||
|
||||
nonisolated func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
|
||||
return try await analyzeFrameWithGemini(pixelBuffer, existingIngredients: [])
|
||||
}
|
||||
|
||||
nonisolated func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
|
||||
// For cooking progress, we'll use Gemini to analyze the current state
|
||||
var latestFrame: CVPixelBuffer?
|
||||
|
||||
for await frame in stream {
|
||||
latestFrame = frame
|
||||
break // Just get one frame for now
|
||||
}
|
||||
|
||||
guard let frame = latestFrame else {
|
||||
return CookingProgress(isComplete: false, confidence: 0.0, feedback: "No frame available")
|
||||
}
|
||||
|
||||
return try await analyzeCookingStepWithGemini(frame, step: step)
|
||||
}
|
||||
|
||||
// MARK: - Blur Detection (Laplacian Variance)
|
||||
|
||||
/// Calculates blur score using Laplacian variance
|
||||
/// Higher value = sharper image, Lower value = more blurry
|
||||
nonisolated private func calculateBlurScore(_ pixelBuffer: CVPixelBuffer) -> Double {
|
||||
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
|
||||
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
|
||||
|
||||
let width = CVPixelBufferGetWidth(pixelBuffer)
|
||||
let height = CVPixelBufferGetHeight(pixelBuffer)
|
||||
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
|
||||
|
||||
guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Convert to grayscale for Laplacian calculation
|
||||
// For BGRA format, we'll use simple luminance approximation
|
||||
var grayscale = [Float](repeating: 0, count: width * height)
|
||||
let pixels = baseAddress.assumingMemoryBound(to: UInt8.self)
|
||||
|
||||
for y in 0..<height {
|
||||
for x in 0..<width {
|
||||
let offset = y * bytesPerRow + x * 4
|
||||
let b = Float(pixels[offset])
|
||||
let g = Float(pixels[offset + 1])
|
||||
let r = Float(pixels[offset + 2])
|
||||
// Luminance formula
|
||||
grayscale[y * width + x] = 0.299 * r + 0.587 * g + 0.114 * b
|
||||
}
|
||||
}
|
||||
|
||||
// Apply Laplacian kernel: [0, 1, 0], [1, -4, 1], [0, 1, 0]
|
||||
var laplacianOutput = [Float](repeating: 0, count: width * height)
|
||||
|
||||
for y in 1..<(height - 1) {
|
||||
for x in 1..<(width - 1) {
|
||||
let idx = y * width + x
|
||||
let laplacian = -4 * grayscale[idx]
|
||||
+ grayscale[(y - 1) * width + x]
|
||||
+ grayscale[(y + 1) * width + x]
|
||||
+ grayscale[y * width + (x - 1)]
|
||||
+ grayscale[y * width + (x + 1)]
|
||||
laplacianOutput[idx] = laplacian
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate variance of Laplacian
|
||||
let count = Float(laplacianOutput.count)
|
||||
let mean = laplacianOutput.reduce(0, +) / count
|
||||
let variance = laplacianOutput.reduce(0) { $0 + ($1 - mean) * ($1 - mean) } / count
|
||||
|
||||
return Double(variance)
|
||||
}
|
||||
|
||||
// MARK: - Gemini API Integration
|
||||
|
||||
nonisolated private func analyzeFrameWithGemini(
|
||||
_ pixelBuffer: CVPixelBuffer,
|
||||
existingIngredients: [Ingredient]
|
||||
) async throws -> [Ingredient] {
|
||||
guard apiKey != "INSERT_KEY_HERE" else {
|
||||
throw VisionServiceError.apiKeyMissing
|
||||
}
|
||||
|
||||
// Convert pixel buffer to base64 JPEG
|
||||
let imageData = try convertToJPEG(pixelBuffer)
|
||||
let base64Image = imageData.base64EncodedString()
|
||||
|
||||
// Build prompt with existing ingredients for deduplication
|
||||
let existingList = existingIngredients.isEmpty
|
||||
? "None yet"
|
||||
: existingIngredients.map { $0.name }.joined(separator: ", ")
|
||||
|
||||
let prompt = """
|
||||
Analyze this image and identify all food items and ingredients visible.
|
||||
|
||||
ALREADY DETECTED ITEMS (avoid duplicates, merge similar items):
|
||||
\(existingList)
|
||||
|
||||
For each NEW item not already listed above, provide:
|
||||
1. The item name (normalized - e.g., "milk" not "milk 2%", "whole milk", etc.)
|
||||
2. Estimated quantity (numeric with unit, e.g., "2", "500ml", "1 dozen")
|
||||
3. Top 3 guesses for what the item might be, with confidence (0.0-1.0)
|
||||
|
||||
IMPORTANT:
|
||||
- If you see "milk 2%" and "milk" is already detected, DO NOT include it
|
||||
- Use simple, normalized names (e.g., "apple" not "red delicious apple")
|
||||
- Quantity should be numeric estimates
|
||||
- Only include food items and ingredients, not containers or non-food items
|
||||
|
||||
RESPOND ONLY WITH VALID JSON in this exact format (no markdown):
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"name": "normalized item name",
|
||||
"quantity": "2",
|
||||
"guesses": [
|
||||
{"name": "primary guess", "confidence": 0.95},
|
||||
{"name": "second guess", "confidence": 0.7},
|
||||
{"name": "third guess", "confidence": 0.3}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
If no new food items are visible, return: {"items": []}
|
||||
"""
|
||||
|
||||
let url = URL(string: "https://generativelanguage.googleapis.com/v1beta/models/\(modelName):generateContent?key=\(apiKey)")!
|
||||
|
||||
var request = URLRequest(url: url)
|
||||
request.httpMethod = "POST"
|
||||
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
||||
request.timeoutInterval = 30
|
||||
|
||||
let requestBody: [String: Any] = [
|
||||
"contents": [
|
||||
[
|
||||
"parts": [
|
||||
["text": prompt],
|
||||
[
|
||||
"inline_data": [
|
||||
"mime_type": "image/jpeg",
|
||||
"data": base64Image
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
],
|
||||
"generationConfig": [
|
||||
"temperature": 0.2,
|
||||
"topK": 32,
|
||||
"topP": 0.95,
|
||||
"maxOutputTokens": 2048
|
||||
]
|
||||
]
|
||||
|
||||
request.httpBody = try JSONSerialization.data(withJSONObject: requestBody)
|
||||
|
||||
let (data, response) = try await URLSession.shared.data(for: request)
|
||||
|
||||
guard let httpResponse = response as? HTTPURLResponse else {
|
||||
throw VisionServiceError.invalidResponse
|
||||
}
|
||||
|
||||
guard (200...299).contains(httpResponse.statusCode) else {
|
||||
print("❌ Gemini API Error: HTTP \(httpResponse.statusCode)")
|
||||
if let errorText = String(data: data, encoding: .utf8) {
|
||||
print("❌ Error body: \(errorText)")
|
||||
}
|
||||
throw VisionServiceError.networkError(NSError(domain: "GeminiAPI", code: httpResponse.statusCode))
|
||||
}
|
||||
|
||||
return try parseGeminiVisionResponse(data)
|
||||
}
|
||||
|
||||
nonisolated private func analyzeCookingStepWithGemini(_ pixelBuffer: CVPixelBuffer, step: String) async throws -> CookingProgress {
|
||||
guard apiKey != "INSERT_KEY_HERE" else {
|
||||
throw VisionServiceError.apiKeyMissing
|
||||
}
|
||||
|
||||
let imageData = try convertToJPEG(pixelBuffer)
|
||||
let base64Image = imageData.base64EncodedString()
|
||||
|
||||
let prompt = """
|
||||
Analyze this cooking image for the following step:
|
||||
"\(step)"
|
||||
|
||||
Determine:
|
||||
1. Is this step complete? (true/false)
|
||||
2. Confidence level (0.0-1.0)
|
||||
3. Brief feedback on the current state
|
||||
|
||||
RESPOND WITH JSON:
|
||||
{
|
||||
"isComplete": false,
|
||||
"confidence": 0.7,
|
||||
"feedback": "Brief description of current state"
|
||||
}
|
||||
"""
|
||||
|
||||
let url = URL(string: "https://generativelanguage.googleapis.com/v1beta/models/\(modelName):generateContent?key=\(apiKey)")!
|
||||
|
||||
var request = URLRequest(url: url)
|
||||
request.httpMethod = "POST"
|
||||
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
|
||||
|
||||
let requestBody: [String: Any] = [
|
||||
"contents": [
|
||||
[
|
||||
"parts": [
|
||||
["text": prompt],
|
||||
[
|
||||
"inline_data": [
|
||||
"mime_type": "image/jpeg",
|
||||
"data": base64Image
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
request.httpBody = try JSONSerialization.data(withJSONObject: requestBody)
|
||||
|
||||
let (data, _) = try await URLSession.shared.data(for: request)
|
||||
|
||||
return try parseCookingProgressResponse(data)
|
||||
}
|
||||
|
||||
// MARK: - Image Conversion
|
||||
|
||||
nonisolated private func convertToJPEG(_ pixelBuffer: CVPixelBuffer) throws -> Data {
|
||||
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
|
||||
let context = CIContext()
|
||||
|
||||
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
|
||||
throw VisionServiceError.invalidResponse
|
||||
}
|
||||
|
||||
let uiImage = UIImage(cgImage: cgImage)
|
||||
|
||||
// Compress to reasonable size for API
|
||||
guard let jpegData = uiImage.jpegData(compressionQuality: 0.7) else {
|
||||
throw VisionServiceError.invalidResponse
|
||||
}
|
||||
|
||||
return jpegData
|
||||
}
|
||||
|
||||
// MARK: - Response Parsing
|
||||
|
||||
nonisolated private func parseGeminiVisionResponse(_ data: Data) throws -> [Ingredient] {
|
||||
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
|
||||
let candidates = json["candidates"] as? [[String: Any]],
|
||||
let firstCandidate = candidates.first,
|
||||
let content = firstCandidate["content"] as? [String: Any],
|
||||
let parts = content["parts"] as? [[String: Any]],
|
||||
let firstPart = parts.first,
|
||||
let text = firstPart["text"] as? String else {
|
||||
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 0))
|
||||
}
|
||||
|
||||
// Clean up response (remove markdown if present)
|
||||
let cleanedText = text
|
||||
.replacingOccurrences(of: "```json", with: "")
|
||||
.replacingOccurrences(of: "```", with: "")
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
print("📝 Cleaned Gemini response: \(cleanedText)")
|
||||
|
||||
guard let jsonData = cleanedText.data(using: .utf8) else {
|
||||
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 1))
|
||||
}
|
||||
|
||||
let response = try JSONDecoder().decode(GeminiVisionResponse.self, from: jsonData)
|
||||
|
||||
return response.items.map { item in
|
||||
// Use the highest confidence guess as the primary name
|
||||
let bestGuess = item.guesses.first
|
||||
let confidence = bestGuess?.confidence ?? 0.5
|
||||
|
||||
return Ingredient(
|
||||
name: item.name,
|
||||
estimatedQuantity: item.quantity,
|
||||
confidence: confidence,
|
||||
guesses: item.guesses.map { IngredientGuess(name: $0.name, confidence: $0.confidence) }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
nonisolated private func parseCookingProgressResponse(_ data: Data) throws -> CookingProgress {
|
||||
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
|
||||
let candidates = json["candidates"] as? [[String: Any]],
|
||||
let firstCandidate = candidates.first,
|
||||
let content = firstCandidate["content"] as? [String: Any],
|
||||
let parts = content["parts"] as? [[String: Any]],
|
||||
let firstPart = parts.first,
|
||||
let text = firstPart["text"] as? String else {
|
||||
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 0))
|
||||
}
|
||||
|
||||
let cleanedText = text
|
||||
.replacingOccurrences(of: "```json", with: "")
|
||||
.replacingOccurrences(of: "```", with: "")
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
guard let jsonData = cleanedText.data(using: .utf8),
|
||||
let progressJson = try JSONSerialization.jsonObject(with: jsonData) as? [String: Any] else {
|
||||
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 1))
|
||||
}
|
||||
|
||||
return CookingProgress(
|
||||
isComplete: progressJson["isComplete"] as? Bool ?? false,
|
||||
confidence: progressJson["confidence"] as? Double ?? 0.5,
|
||||
feedback: progressJson["feedback"] as? String ?? "Processing..."
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - Ingredient Merging
|
||||
|
||||
/// Merges new ingredients with existing ones, handling similar names and taking max quantity
|
||||
nonisolated private func mergeIngredients(existing: [Ingredient], new: [Ingredient]) -> [Ingredient] {
|
||||
var merged = existing.reduce(into: [String: Ingredient]()) { dict, ingredient in
|
||||
dict[ingredient.name.lowercased()] = ingredient
|
||||
}
|
||||
|
||||
for newIngredient in new {
|
||||
let normalizedName = newIngredient.name.lowercased()
|
||||
|
||||
// Check for similar existing items
|
||||
let similarKey = merged.keys.first { existingKey in
|
||||
isSimilarIngredient(existingKey, normalizedName)
|
||||
}
|
||||
|
||||
if let key = similarKey, let existing = merged[key] {
|
||||
// Merge: take max quantity, higher confidence
|
||||
let mergedQuantity = mergeQuantities(existing.estimatedQuantity, newIngredient.estimatedQuantity)
|
||||
let mergedConfidence = max(existing.confidence, newIngredient.confidence)
|
||||
|
||||
merged[key] = Ingredient(
|
||||
id: existing.id,
|
||||
name: existing.name, // Keep original name
|
||||
estimatedQuantity: mergedQuantity,
|
||||
confidence: mergedConfidence,
|
||||
guesses: existing.guesses // Keep original guesses
|
||||
)
|
||||
} else {
|
||||
// Add as new
|
||||
merged[normalizedName] = newIngredient
|
||||
}
|
||||
}
|
||||
|
||||
return Array(merged.values).sorted { $0.confidence > $1.confidence }
|
||||
}
|
||||
|
||||
/// Checks if two ingredient names are similar (e.g., "milk" and "milk 2%")
|
||||
nonisolated private func isSimilarIngredient(_ name1: String, _ name2: String) -> Bool {
|
||||
// Exact match
|
||||
if name1 == name2 { return true }
|
||||
|
||||
// One contains the other
|
||||
if name1.contains(name2) || name2.contains(name1) { return true }
|
||||
|
||||
// Common ingredient variations
|
||||
let variations: [[String]] = [
|
||||
["milk", "whole milk", "2% milk", "skim milk", "milk 2%"],
|
||||
["egg", "eggs", "large eggs"],
|
||||
["butter", "unsalted butter", "salted butter"],
|
||||
["cheese", "cheddar", "cheddar cheese"],
|
||||
["chicken", "chicken breast", "chicken thigh"],
|
||||
["onion", "onions", "yellow onion", "white onion"],
|
||||
["tomato", "tomatoes", "cherry tomatoes"],
|
||||
["potato", "potatoes", "russet potato"]
|
||||
]
|
||||
|
||||
for group in variations {
|
||||
let lowercaseGroup = group.map { $0.lowercased() }
|
||||
if lowercaseGroup.contains(name1) && lowercaseGroup.contains(name2) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/// Merges two quantity strings, taking the maximum
|
||||
nonisolated private func mergeQuantities(_ q1: String, _ q2: String) -> String {
|
||||
// Extract numeric values
|
||||
let num1 = extractNumber(from: q1) ?? 0
|
||||
let num2 = extractNumber(from: q2) ?? 0
|
||||
|
||||
// Return the quantity with larger number
|
||||
return num1 >= num2 ? q1 : q2
|
||||
}
|
||||
|
||||
nonisolated private func extractNumber(from string: String) -> Double? {
|
||||
let pattern = #"[\d.]+"#
|
||||
guard let regex = try? NSRegularExpression(pattern: pattern),
|
||||
let match = regex.firstMatch(in: string, range: NSRange(string.startIndex..., in: string)),
|
||||
let range = Range(match.range, in: string) else {
|
||||
return nil
|
||||
}
|
||||
return Double(string[range])
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Response Models
|
||||
|
||||
private struct GeminiVisionResponse: Codable {
|
||||
let items: [GeminiVisionItem]
|
||||
}
|
||||
|
||||
private struct GeminiVisionItem: Codable {
|
||||
let name: String
|
||||
let quantity: String
|
||||
let guesses: [GeminiGuess]
|
||||
}
|
||||
|
||||
private struct GeminiGuess: Codable {
|
||||
let name: String
|
||||
let confidence: Double
|
||||
}
|
||||
@@ -19,13 +19,29 @@ final class ScannerViewModel: ObservableObject {
|
||||
@Published var error: Error?
|
||||
@Published var scanProgress: String = "Ready to scan"
|
||||
|
||||
/// The most recently detected new ingredient (for banner display)
|
||||
@Published var latestNewIngredient: Ingredient?
|
||||
|
||||
private let visionService: VisionService
|
||||
private let cameraManager: CameraManager
|
||||
private var scanTask: Task<Void, Never>?
|
||||
|
||||
nonisolated init(visionService: VisionService = ARVisionService(),
|
||||
cameraManager: CameraManager = CameraManager()) {
|
||||
/// Callback when a new ingredient is detected (not a duplicate)
|
||||
var onNewIngredientDetected: ((Ingredient) -> Void)?
|
||||
|
||||
nonisolated init(cameraManager: CameraManager = CameraManager()) {
|
||||
print("📱 ScannerViewModel.init() - Creating ViewModel at \(Date())")
|
||||
|
||||
// Select vision service based on configuration
|
||||
let visionService: VisionService = switch AppConfig.scanningMode {
|
||||
case .geminiVision:
|
||||
GeminiVisionService()
|
||||
case .arKit:
|
||||
ARVisionService()
|
||||
}
|
||||
|
||||
print("📱 ScannerViewModel.init() - Using \(AppConfig.scanningMode.rawValue) scanning mode")
|
||||
|
||||
self.visionService = visionService
|
||||
self.cameraManager = cameraManager
|
||||
}
|
||||
@@ -62,19 +78,80 @@ final class ScannerViewModel: ObservableObject {
|
||||
guard !isScanning else { return }
|
||||
|
||||
isScanning = true
|
||||
detectedIngredients.removeAll()
|
||||
scanProgress = "Scanning ingredients..."
|
||||
print("📱 ScannerViewModel.startScanning() - Started with \(AppConfig.scanningMode.rawValue) mode")
|
||||
|
||||
scanTask = Task {
|
||||
let startTime = Date()
|
||||
|
||||
do {
|
||||
let stream = cameraManager.frameStream()
|
||||
let ingredients = try await visionService.detectIngredients(from: stream)
|
||||
|
||||
// For Gemini mode, we use real-time detection with callbacks
|
||||
if AppConfig.scanningMode == .geminiVision {
|
||||
// Process frames continuously until stopped or timeout
|
||||
var lastProcessTime = Date()
|
||||
var currentSecondFrames: [(buffer: CVPixelBuffer, timestamp: Date)] = []
|
||||
|
||||
for await frame in stream {
|
||||
guard !Task.isCancelled else { break }
|
||||
|
||||
// Check timeout
|
||||
if Date().timeIntervalSince(startTime) >= AppConfig.maxScanDuration {
|
||||
print("📱 ScannerViewModel: Max scan duration reached")
|
||||
break
|
||||
}
|
||||
|
||||
currentSecondFrames.append((buffer: frame, timestamp: Date()))
|
||||
|
||||
// Process every second
|
||||
let now = Date()
|
||||
if now.timeIntervalSince(lastProcessTime) >= AppConfig.geminiFrameInterval {
|
||||
// Pick the frame from the middle of the batch (reasonable approximation)
|
||||
if let bestFrame = currentSecondFrames[safe: currentSecondFrames.count / 2]?.buffer {
|
||||
do {
|
||||
let previousCount = detectedIngredients.count
|
||||
let ingredients = try await visionService.detectIngredients(from: bestFrame)
|
||||
|
||||
// Find new ingredients before merging
|
||||
let newIngredients = findNewIngredients(ingredients)
|
||||
|
||||
// Merge with existing
|
||||
updateDetectedIngredients(ingredients, mergeMode: true)
|
||||
|
||||
// Notify about new ingredients
|
||||
for newIngredient in newIngredients {
|
||||
print("🆕 New ingredient detected: \(newIngredient.name)")
|
||||
latestNewIngredient = newIngredient
|
||||
onNewIngredientDetected?(newIngredient)
|
||||
}
|
||||
|
||||
scanProgress = "Found \(detectedIngredients.count) items..."
|
||||
} catch {
|
||||
print("⚠️ Frame analysis error: \(error)")
|
||||
// Continue scanning on errors
|
||||
}
|
||||
}
|
||||
|
||||
currentSecondFrames.removeAll()
|
||||
lastProcessTime = now
|
||||
}
|
||||
|
||||
// Stop if we have enough ingredients
|
||||
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// AR mode: use batch detection
|
||||
let ingredients = try await visionService.detectIngredients(from: stream)
|
||||
updateDetectedIngredients(ingredients)
|
||||
scanProgress = "Scan complete! Found \(ingredients.count) ingredients"
|
||||
}
|
||||
|
||||
scanProgress = "Scan complete! Found \(detectedIngredients.count) ingredients"
|
||||
} catch {
|
||||
self.error = error
|
||||
scanProgress = "Scan failed"
|
||||
scanProgress = "Scan failed: \(error.localizedDescription)"
|
||||
}
|
||||
|
||||
isScanning = false
|
||||
@@ -123,20 +200,58 @@ final class ScannerViewModel: ObservableObject {
|
||||
|
||||
// MARK: - Ingredient Management
|
||||
|
||||
/// Finds ingredients that are truly new (not already in our list)
|
||||
private func findNewIngredients(_ newIngredients: [Ingredient]) -> [Ingredient] {
|
||||
return newIngredients.filter { newIngredient in
|
||||
!detectedIngredients.contains { existing in
|
||||
isSimilarIngredient(existing.name, newIngredient.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if two ingredient names refer to the same item
|
||||
private func isSimilarIngredient(_ name1: String, _ name2: String) -> Bool {
|
||||
let n1 = name1.lowercased()
|
||||
let n2 = name2.lowercased()
|
||||
|
||||
// Exact match
|
||||
if n1 == n2 { return true }
|
||||
|
||||
// One contains the other
|
||||
if n1.contains(n2) || n2.contains(n1) { return true }
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
private func updateDetectedIngredients(_ newIngredients: [Ingredient], mergeMode: Bool = false) {
|
||||
if mergeMode {
|
||||
// Merge with existing ingredients, keeping higher confidence
|
||||
// Merge with existing ingredients, keeping higher confidence and max quantity
|
||||
var merged = detectedIngredients.reduce(into: [String: Ingredient]()) { dict, ingredient in
|
||||
dict[ingredient.name] = ingredient
|
||||
dict[ingredient.name.lowercased()] = ingredient
|
||||
}
|
||||
|
||||
for ingredient in newIngredients {
|
||||
if let existing = merged[ingredient.name] {
|
||||
if ingredient.confidence > existing.confidence {
|
||||
merged[ingredient.name] = ingredient
|
||||
let normalizedName = ingredient.name.lowercased()
|
||||
|
||||
// Check for similar existing items
|
||||
let similarKey = merged.keys.first { existingKey in
|
||||
isSimilarIngredient(existingKey, normalizedName)
|
||||
}
|
||||
|
||||
if let key = similarKey, let existing = merged[key] {
|
||||
// Merge: take max quantity, higher confidence
|
||||
let mergedQuantity = mergeQuantities(existing.estimatedQuantity, ingredient.estimatedQuantity)
|
||||
let mergedConfidence = max(existing.confidence, ingredient.confidence)
|
||||
|
||||
merged[key] = Ingredient(
|
||||
id: existing.id,
|
||||
name: existing.name,
|
||||
estimatedQuantity: mergedQuantity,
|
||||
confidence: mergedConfidence,
|
||||
guesses: existing.guesses.isEmpty ? ingredient.guesses : existing.guesses
|
||||
)
|
||||
} else {
|
||||
merged[ingredient.name] = ingredient
|
||||
merged[normalizedName] = ingredient
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,6 +261,23 @@ final class ScannerViewModel: ObservableObject {
|
||||
}
|
||||
}
|
||||
|
||||
/// Merges two quantity strings, taking the maximum numeric value
|
||||
private func mergeQuantities(_ q1: String, _ q2: String) -> String {
|
||||
let num1 = extractNumber(from: q1) ?? 0
|
||||
let num2 = extractNumber(from: q2) ?? 0
|
||||
return num1 >= num2 ? q1 : q2
|
||||
}
|
||||
|
||||
private func extractNumber(from string: String) -> Double? {
|
||||
let pattern = #"[\d.]+"#
|
||||
guard let regex = try? NSRegularExpression(pattern: pattern),
|
||||
let match = regex.firstMatch(in: string, range: NSRange(string.startIndex..., in: string)),
|
||||
let range = Range(match.range, in: string) else {
|
||||
return nil
|
||||
}
|
||||
return Double(string[range])
|
||||
}
|
||||
|
||||
func addIngredient(_ ingredient: Ingredient) {
|
||||
if !detectedIngredients.contains(where: { $0.id == ingredient.id }) {
|
||||
detectedIngredients.append(ingredient)
|
||||
@@ -181,4 +313,52 @@ final class ScannerViewModel: ObservableObject {
|
||||
await cameraManager.cleanup()
|
||||
print("📱 ScannerViewModel.cleanup() - ✅ Cleanup complete")
|
||||
}
|
||||
|
||||
// MARK: - Local Persistence
|
||||
|
||||
/// Saves ingredients locally using UserDefaults
|
||||
/// TODO: Migrate to FirestoreRepository when Firebase is configured
|
||||
/// To migrate: Replace this method with a call to FirestoreRepository.saveIngredients()
|
||||
func saveIngredientsLocally() {
|
||||
do {
|
||||
let data = try JSONEncoder().encode(detectedIngredients)
|
||||
UserDefaults.standard.set(data, forKey: "savedIngredients")
|
||||
print("💾 Saved \(detectedIngredients.count) ingredients locally")
|
||||
} catch {
|
||||
print("❌ Failed to save ingredients: \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
/// Loads ingredients from local storage
|
||||
/// TODO: Migrate to FirestoreRepository when Firebase is configured
|
||||
/// To migrate: Replace this method with a call to FirestoreRepository.loadIngredients()
|
||||
func loadIngredientsLocally() {
|
||||
guard let data = UserDefaults.standard.data(forKey: "savedIngredients") else {
|
||||
print("📂 No saved ingredients found")
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
detectedIngredients = try JSONDecoder().decode([Ingredient].self, from: data)
|
||||
print("📂 Loaded \(detectedIngredients.count) ingredients from local storage")
|
||||
} catch {
|
||||
print("❌ Failed to load ingredients: \(error)")
|
||||
}
|
||||
}
|
||||
|
||||
/// Clears all saved ingredients
|
||||
func clearSavedIngredients() {
|
||||
detectedIngredients.removeAll()
|
||||
UserDefaults.standard.removeObject(forKey: "savedIngredients")
|
||||
print("🗑️ Cleared all saved ingredients")
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Array Safe Subscript Extension
|
||||
|
||||
extension Collection {
|
||||
/// Returns the element at the specified index if it exists, otherwise nil.
|
||||
subscript(safe index: Index) -> Element? {
|
||||
indices.contains(index) ? self[index] : nil
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
import SwiftUI
|
||||
import ARKit
|
||||
import RealityKit
|
||||
import AVFoundation
|
||||
|
||||
struct ScannerView: View {
|
||||
@StateObject private var viewModel = ScannerViewModel()
|
||||
@@ -18,6 +19,11 @@ struct ScannerView: View {
|
||||
@State private var showARView = false
|
||||
@State private var previewLayer: AVCaptureVideoPreviewLayer?
|
||||
|
||||
// Banner notification state
|
||||
@State private var showBanner = false
|
||||
@State private var bannerIngredient: Ingredient?
|
||||
@State private var bannerTask: Task<Void, Never>?
|
||||
|
||||
init() {
|
||||
print("🔵 ScannerView.init() - View initialized at \(Date())")
|
||||
}
|
||||
@@ -49,6 +55,14 @@ struct ScannerView: View {
|
||||
|
||||
// Overlay UI
|
||||
VStack {
|
||||
// New ingredient banner (top of screen)
|
||||
if showBanner, let ingredient = bannerIngredient {
|
||||
NewIngredientBanner(ingredient: ingredient)
|
||||
.transition(.move(edge: .top).combined(with: .opacity))
|
||||
.padding(.horizontal)
|
||||
.padding(.top, 8)
|
||||
}
|
||||
|
||||
// Top status bar
|
||||
statusBar
|
||||
.padding()
|
||||
@@ -71,7 +85,7 @@ struct ScannerView: View {
|
||||
.padding()
|
||||
}
|
||||
}
|
||||
.navigationTitle(showARView ? "AR Scanner" : "Camera Preview")
|
||||
.navigationTitle(showARView ? "AR Scanner" : "Ingredient Scanner")
|
||||
.navigationBarTitleDisplayMode(.inline)
|
||||
.toolbar {
|
||||
ToolbarItem(placement: .navigationBarTrailing) {
|
||||
@@ -84,6 +98,15 @@ struct ScannerView: View {
|
||||
}
|
||||
.task {
|
||||
print("🔵 ScannerView.task - Task started at \(Date())")
|
||||
|
||||
// Load any previously saved ingredients
|
||||
viewModel.loadIngredientsLocally()
|
||||
|
||||
// Setup new ingredient notification handler
|
||||
viewModel.onNewIngredientDetected = { [self] ingredient in
|
||||
showNewIngredientBanner(ingredient)
|
||||
}
|
||||
|
||||
if !showARView {
|
||||
print("🔵 ScannerView.task - Calling setupCamera()")
|
||||
await viewModel.setupCamera()
|
||||
@@ -96,10 +119,17 @@ struct ScannerView: View {
|
||||
}
|
||||
.onDisappear {
|
||||
print("🔵 ScannerView.onDisappear - Cleaning up at \(Date())")
|
||||
bannerTask?.cancel()
|
||||
Task {
|
||||
await viewModel.cleanup()
|
||||
}
|
||||
}
|
||||
.onChange(of: viewModel.isScanning) { wasScanning, isScanning in
|
||||
// When scanning stops, save ingredients and optionally navigate
|
||||
if wasScanning && !isScanning && !viewModel.detectedIngredients.isEmpty {
|
||||
viewModel.saveIngredientsLocally()
|
||||
}
|
||||
}
|
||||
.alert("Camera Error", isPresented: .constant(viewModel.error != nil)) {
|
||||
Button("OK") {
|
||||
viewModel.error = nil
|
||||
@@ -120,6 +150,28 @@ struct ScannerView: View {
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Banner Management
|
||||
|
||||
private func showNewIngredientBanner(_ ingredient: Ingredient) {
|
||||
// Cancel any existing banner dismissal
|
||||
bannerTask?.cancel()
|
||||
|
||||
// Show new banner
|
||||
withAnimation(.spring(response: 0.3)) {
|
||||
bannerIngredient = ingredient
|
||||
showBanner = true
|
||||
}
|
||||
|
||||
// Auto-dismiss after 1 second
|
||||
bannerTask = Task { @MainActor in
|
||||
try? await Task.sleep(for: .seconds(1))
|
||||
guard !Task.isCancelled else { return }
|
||||
withAnimation(.easeOut(duration: 0.3)) {
|
||||
showBanner = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - UI Components
|
||||
|
||||
private var statusBar: some View {
|
||||
@@ -353,6 +405,58 @@ struct ManualIngredientEntry: View {
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - New Ingredient Banner
|
||||
|
||||
struct NewIngredientBanner: View {
|
||||
let ingredient: Ingredient
|
||||
|
||||
var body: some View {
|
||||
HStack(spacing: 12) {
|
||||
Image(systemName: "plus.circle.fill")
|
||||
.foregroundStyle(.white)
|
||||
.font(.title2)
|
||||
|
||||
VStack(alignment: .leading, spacing: 2) {
|
||||
Text("New Item Detected")
|
||||
.font(.caption)
|
||||
.foregroundStyle(.white.opacity(0.8))
|
||||
|
||||
Text(ingredient.name.capitalized)
|
||||
.font(.headline)
|
||||
.foregroundStyle(.white)
|
||||
|
||||
if !ingredient.estimatedQuantity.isEmpty {
|
||||
Text("Qty: \(ingredient.estimatedQuantity)")
|
||||
.font(.caption)
|
||||
.foregroundStyle(.white.opacity(0.8))
|
||||
}
|
||||
}
|
||||
|
||||
Spacer()
|
||||
|
||||
// Confidence indicator
|
||||
Text("\(Int(ingredient.confidence * 100))%")
|
||||
.font(.caption)
|
||||
.fontWeight(.semibold)
|
||||
.foregroundStyle(.white)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(.white.opacity(0.2))
|
||||
.clipShape(Capsule())
|
||||
}
|
||||
.padding()
|
||||
.background(
|
||||
LinearGradient(
|
||||
colors: [Color.green, Color.green.opacity(0.8)],
|
||||
startPoint: .leading,
|
||||
endPoint: .trailing
|
||||
)
|
||||
)
|
||||
.clipShape(RoundedRectangle(cornerRadius: 12))
|
||||
.shadow(color: .black.opacity(0.2), radius: 8, y: 4)
|
||||
}
|
||||
}
|
||||
|
||||
#Preview {
|
||||
ScannerView()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user