gemini stuff?

This commit is contained in:
2026-04-04 15:22:57 -05:00
parent 3bfd4a6f4c
commit 098a6c4214
5 changed files with 842 additions and 22 deletions

View File

@@ -7,6 +7,11 @@
import Foundation import Foundation
/// Scanning mode for ingredient detection
enum ScanningMode: String, CaseIterable {
case geminiVision // Uses Gemini API for image analysis (recommended)
case arKit // Uses ARKit for spatial scanning (future implementation)
}
enum AppConfig: Sendable { enum AppConfig: Sendable {
// MARK: - Google Gemini API // MARK: - Google Gemini API
@@ -22,13 +27,24 @@ enum AppConfig: Sendable {
/// 2. Add it to the Xcode project root /// 2. Add it to the Xcode project root
/// 3. Ensure it's added to the target /// 3. Ensure it's added to the target
// MARK: - AR Configuration // MARK: - Scanning Configuration
/// Enable AR-based scanning features
static let enableARScanning = true /// Current scanning mode - change this to switch between vision implementations
/// Options: .geminiVision (uses Gemini API), .arKit (uses ARKit - future)
static let scanningMode: ScanningMode = .geminiVision
/// Enable AR-based scanning features (legacy flag, use scanningMode instead)
static let enableARScanning = false
// MARK: - Feature Flags // MARK: - Feature Flags
static let enableRealTimeDetection = true static let enableRealTimeDetection = true
static let enableCookingMode = true static let enableCookingMode = true
static let maxIngredientsPerScan = 50 static let maxIngredientsPerScan = 50
static let minConfidenceThreshold = 0.5 static let minConfidenceThreshold = 0.5
// MARK: - Scanning Settings
/// How often to send frames to Gemini (in seconds)
static let geminiFrameInterval: Double = 1.0
/// Maximum scan duration before auto-stop (in seconds)
static let maxScanDuration: Double = 60.0
} }

View File

@@ -7,24 +7,41 @@
import Foundation import Foundation
/// Represents an alternative guess for what an ingredient might be
struct IngredientGuess: Identifiable, Codable, Equatable {
var id: String { name }
let name: String
let confidence: Double
}
struct Ingredient: Identifiable, Codable, Equatable { struct Ingredient: Identifiable, Codable, Equatable {
let id: String let id: String
var name: String var name: String
var estimatedQuantity: String var estimatedQuantity: String
var confidence: Double var confidence: Double
/// Top 3 guesses for what this ingredient might be (from AI detection)
var guesses: [IngredientGuess]
init(id: String = UUID().uuidString, init(id: String = UUID().uuidString,
name: String, name: String,
estimatedQuantity: String, estimatedQuantity: String,
confidence: Double = 1.0) { confidence: Double = 1.0,
guesses: [IngredientGuess] = []) {
self.id = id self.id = id
self.name = name self.name = name
self.estimatedQuantity = estimatedQuantity self.estimatedQuantity = estimatedQuantity
self.confidence = confidence self.confidence = confidence
self.guesses = guesses
} }
/// Indicates if the detection confidence is low and requires user verification /// Indicates if the detection confidence is low and requires user verification
var needsVerification: Bool { var needsVerification: Bool {
confidence < 0.7 confidence < 0.7
} }
/// Returns the best guess name, or the current name if no guesses available
var bestGuessName: String {
guesses.first?.name ?? name
}
} }

View File

@@ -0,0 +1,503 @@
//
// GeminiVisionService.swift
// SousChefAI
//
// Vision service using Google Gemini 3.0 Flash for ingredient detection
// Sends least blurry frame per second to Gemini API for analysis
//
import Foundation
import CoreVideo
import CoreImage
import Accelerate
import UIKit
/// Gemini-based implementation for vision ingredient detection
final class GeminiVisionService: VisionService, @unchecked Sendable {
private let apiKey: String
private let modelName = "gemini-2.0-flash-exp" // Will update to 3.0 when available
nonisolated init(apiKey: String = AppConfig.geminiAPIKey) {
self.apiKey = apiKey
}
// MARK: - VisionService Protocol Implementation
nonisolated func detectIngredients(from stream: AsyncStream<CVPixelBuffer>) async throws -> [Ingredient] {
// This method is used for continuous scanning
// Collect frames, pick least blurry per second, send to Gemini
var allDetectedIngredients: [Ingredient] = []
var currentSecondFrames: [(buffer: CVPixelBuffer, blurScore: Double, timestamp: Date)] = []
var lastProcessTime = Date()
for await pixelBuffer in stream {
let now = Date()
let blurScore = calculateBlurScore(pixelBuffer)
currentSecondFrames.append((buffer: pixelBuffer, blurScore: blurScore, timestamp: now))
// Process every second
if now.timeIntervalSince(lastProcessTime) >= 1.0 {
// Find least blurry frame (highest Laplacian variance = sharpest)
if let bestFrame = currentSecondFrames.max(by: { $0.blurScore < $1.blurScore }) {
do {
let ingredients = try await analyzeFrameWithGemini(
bestFrame.buffer,
existingIngredients: allDetectedIngredients
)
// Debug output
print("🔍 GeminiVisionService: Detected \(ingredients.count) items in frame")
if !ingredients.isEmpty {
let jsonData = try? JSONEncoder().encode(ingredients)
if let jsonString = jsonData.flatMap({ String(data: $0, encoding: .utf8) }) {
print("📋 JSON Response: \(jsonString)")
}
}
// Merge ingredients
allDetectedIngredients = mergeIngredients(existing: allDetectedIngredients, new: ingredients)
} catch {
print("⚠️ GeminiVisionService: Frame analysis failed: \(error)")
// Continue scanning on errors
}
}
currentSecondFrames.removeAll()
lastProcessTime = now
}
// Stop after reasonable scan time or max ingredients
if allDetectedIngredients.count >= AppConfig.maxIngredientsPerScan {
break
}
}
return allDetectedIngredients
}
nonisolated func detectIngredients(from pixelBuffer: CVPixelBuffer) async throws -> [Ingredient] {
return try await analyzeFrameWithGemini(pixelBuffer, existingIngredients: [])
}
nonisolated func analyzeCookingProgress(from stream: AsyncStream<CVPixelBuffer>, for step: String) async throws -> CookingProgress {
// For cooking progress, we'll use Gemini to analyze the current state
var latestFrame: CVPixelBuffer?
for await frame in stream {
latestFrame = frame
break // Just get one frame for now
}
guard let frame = latestFrame else {
return CookingProgress(isComplete: false, confidence: 0.0, feedback: "No frame available")
}
return try await analyzeCookingStepWithGemini(frame, step: step)
}
// MARK: - Blur Detection (Laplacian Variance)
/// Calculates blur score using Laplacian variance
/// Higher value = sharper image, Lower value = more blurry
nonisolated private func calculateBlurScore(_ pixelBuffer: CVPixelBuffer) -> Double {
CVPixelBufferLockBaseAddress(pixelBuffer, .readOnly)
defer { CVPixelBufferUnlockBaseAddress(pixelBuffer, .readOnly) }
let width = CVPixelBufferGetWidth(pixelBuffer)
let height = CVPixelBufferGetHeight(pixelBuffer)
let bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
guard let baseAddress = CVPixelBufferGetBaseAddress(pixelBuffer) else {
return 0
}
// Convert to grayscale for Laplacian calculation
// For BGRA format, we'll use simple luminance approximation
var grayscale = [Float](repeating: 0, count: width * height)
let pixels = baseAddress.assumingMemoryBound(to: UInt8.self)
for y in 0..<height {
for x in 0..<width {
let offset = y * bytesPerRow + x * 4
let b = Float(pixels[offset])
let g = Float(pixels[offset + 1])
let r = Float(pixels[offset + 2])
// Luminance formula
grayscale[y * width + x] = 0.299 * r + 0.587 * g + 0.114 * b
}
}
// Apply Laplacian kernel: [0, 1, 0], [1, -4, 1], [0, 1, 0]
var laplacianOutput = [Float](repeating: 0, count: width * height)
for y in 1..<(height - 1) {
for x in 1..<(width - 1) {
let idx = y * width + x
let laplacian = -4 * grayscale[idx]
+ grayscale[(y - 1) * width + x]
+ grayscale[(y + 1) * width + x]
+ grayscale[y * width + (x - 1)]
+ grayscale[y * width + (x + 1)]
laplacianOutput[idx] = laplacian
}
}
// Calculate variance of Laplacian
let count = Float(laplacianOutput.count)
let mean = laplacianOutput.reduce(0, +) / count
let variance = laplacianOutput.reduce(0) { $0 + ($1 - mean) * ($1 - mean) } / count
return Double(variance)
}
// MARK: - Gemini API Integration
nonisolated private func analyzeFrameWithGemini(
_ pixelBuffer: CVPixelBuffer,
existingIngredients: [Ingredient]
) async throws -> [Ingredient] {
guard apiKey != "INSERT_KEY_HERE" else {
throw VisionServiceError.apiKeyMissing
}
// Convert pixel buffer to base64 JPEG
let imageData = try convertToJPEG(pixelBuffer)
let base64Image = imageData.base64EncodedString()
// Build prompt with existing ingredients for deduplication
let existingList = existingIngredients.isEmpty
? "None yet"
: existingIngredients.map { $0.name }.joined(separator: ", ")
let prompt = """
Analyze this image and identify all food items and ingredients visible.
ALREADY DETECTED ITEMS (avoid duplicates, merge similar items):
\(existingList)
For each NEW item not already listed above, provide:
1. The item name (normalized - e.g., "milk" not "milk 2%", "whole milk", etc.)
2. Estimated quantity (numeric with unit, e.g., "2", "500ml", "1 dozen")
3. Top 3 guesses for what the item might be, with confidence (0.0-1.0)
IMPORTANT:
- If you see "milk 2%" and "milk" is already detected, DO NOT include it
- Use simple, normalized names (e.g., "apple" not "red delicious apple")
- Quantity should be numeric estimates
- Only include food items and ingredients, not containers or non-food items
RESPOND ONLY WITH VALID JSON in this exact format (no markdown):
{
"items": [
{
"name": "normalized item name",
"quantity": "2",
"guesses": [
{"name": "primary guess", "confidence": 0.95},
{"name": "second guess", "confidence": 0.7},
{"name": "third guess", "confidence": 0.3}
]
}
]
}
If no new food items are visible, return: {"items": []}
"""
let url = URL(string: "https://generativelanguage.googleapis.com/v1beta/models/\(modelName):generateContent?key=\(apiKey)")!
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.timeoutInterval = 30
let requestBody: [String: Any] = [
"contents": [
[
"parts": [
["text": prompt],
[
"inline_data": [
"mime_type": "image/jpeg",
"data": base64Image
]
]
]
]
],
"generationConfig": [
"temperature": 0.2,
"topK": 32,
"topP": 0.95,
"maxOutputTokens": 2048
]
]
request.httpBody = try JSONSerialization.data(withJSONObject: requestBody)
let (data, response) = try await URLSession.shared.data(for: request)
guard let httpResponse = response as? HTTPURLResponse else {
throw VisionServiceError.invalidResponse
}
guard (200...299).contains(httpResponse.statusCode) else {
print("❌ Gemini API Error: HTTP \(httpResponse.statusCode)")
if let errorText = String(data: data, encoding: .utf8) {
print("❌ Error body: \(errorText)")
}
throw VisionServiceError.networkError(NSError(domain: "GeminiAPI", code: httpResponse.statusCode))
}
return try parseGeminiVisionResponse(data)
}
nonisolated private func analyzeCookingStepWithGemini(_ pixelBuffer: CVPixelBuffer, step: String) async throws -> CookingProgress {
guard apiKey != "INSERT_KEY_HERE" else {
throw VisionServiceError.apiKeyMissing
}
let imageData = try convertToJPEG(pixelBuffer)
let base64Image = imageData.base64EncodedString()
let prompt = """
Analyze this cooking image for the following step:
"\(step)"
Determine:
1. Is this step complete? (true/false)
2. Confidence level (0.0-1.0)
3. Brief feedback on the current state
RESPOND WITH JSON:
{
"isComplete": false,
"confidence": 0.7,
"feedback": "Brief description of current state"
}
"""
let url = URL(string: "https://generativelanguage.googleapis.com/v1beta/models/\(modelName):generateContent?key=\(apiKey)")!
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
let requestBody: [String: Any] = [
"contents": [
[
"parts": [
["text": prompt],
[
"inline_data": [
"mime_type": "image/jpeg",
"data": base64Image
]
]
]
]
]
]
request.httpBody = try JSONSerialization.data(withJSONObject: requestBody)
let (data, _) = try await URLSession.shared.data(for: request)
return try parseCookingProgressResponse(data)
}
// MARK: - Image Conversion
nonisolated private func convertToJPEG(_ pixelBuffer: CVPixelBuffer) throws -> Data {
let ciImage = CIImage(cvPixelBuffer: pixelBuffer)
let context = CIContext()
guard let cgImage = context.createCGImage(ciImage, from: ciImage.extent) else {
throw VisionServiceError.invalidResponse
}
let uiImage = UIImage(cgImage: cgImage)
// Compress to reasonable size for API
guard let jpegData = uiImage.jpegData(compressionQuality: 0.7) else {
throw VisionServiceError.invalidResponse
}
return jpegData
}
// MARK: - Response Parsing
nonisolated private func parseGeminiVisionResponse(_ data: Data) throws -> [Ingredient] {
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
let candidates = json["candidates"] as? [[String: Any]],
let firstCandidate = candidates.first,
let content = firstCandidate["content"] as? [String: Any],
let parts = content["parts"] as? [[String: Any]],
let firstPart = parts.first,
let text = firstPart["text"] as? String else {
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 0))
}
// Clean up response (remove markdown if present)
let cleanedText = text
.replacingOccurrences(of: "```json", with: "")
.replacingOccurrences(of: "```", with: "")
.trimmingCharacters(in: .whitespacesAndNewlines)
print("📝 Cleaned Gemini response: \(cleanedText)")
guard let jsonData = cleanedText.data(using: .utf8) else {
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 1))
}
let response = try JSONDecoder().decode(GeminiVisionResponse.self, from: jsonData)
return response.items.map { item in
// Use the highest confidence guess as the primary name
let bestGuess = item.guesses.first
let confidence = bestGuess?.confidence ?? 0.5
return Ingredient(
name: item.name,
estimatedQuantity: item.quantity,
confidence: confidence,
guesses: item.guesses.map { IngredientGuess(name: $0.name, confidence: $0.confidence) }
)
}
}
nonisolated private func parseCookingProgressResponse(_ data: Data) throws -> CookingProgress {
guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any],
let candidates = json["candidates"] as? [[String: Any]],
let firstCandidate = candidates.first,
let content = firstCandidate["content"] as? [String: Any],
let parts = content["parts"] as? [[String: Any]],
let firstPart = parts.first,
let text = firstPart["text"] as? String else {
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 0))
}
let cleanedText = text
.replacingOccurrences(of: "```json", with: "")
.replacingOccurrences(of: "```", with: "")
.trimmingCharacters(in: .whitespacesAndNewlines)
guard let jsonData = cleanedText.data(using: .utf8),
let progressJson = try JSONSerialization.jsonObject(with: jsonData) as? [String: Any] else {
throw VisionServiceError.decodingError(NSError(domain: "Parsing", code: 1))
}
return CookingProgress(
isComplete: progressJson["isComplete"] as? Bool ?? false,
confidence: progressJson["confidence"] as? Double ?? 0.5,
feedback: progressJson["feedback"] as? String ?? "Processing..."
)
}
// MARK: - Ingredient Merging
/// Merges new ingredients with existing ones, handling similar names and taking max quantity
nonisolated private func mergeIngredients(existing: [Ingredient], new: [Ingredient]) -> [Ingredient] {
var merged = existing.reduce(into: [String: Ingredient]()) { dict, ingredient in
dict[ingredient.name.lowercased()] = ingredient
}
for newIngredient in new {
let normalizedName = newIngredient.name.lowercased()
// Check for similar existing items
let similarKey = merged.keys.first { existingKey in
isSimilarIngredient(existingKey, normalizedName)
}
if let key = similarKey, let existing = merged[key] {
// Merge: take max quantity, higher confidence
let mergedQuantity = mergeQuantities(existing.estimatedQuantity, newIngredient.estimatedQuantity)
let mergedConfidence = max(existing.confidence, newIngredient.confidence)
merged[key] = Ingredient(
id: existing.id,
name: existing.name, // Keep original name
estimatedQuantity: mergedQuantity,
confidence: mergedConfidence,
guesses: existing.guesses // Keep original guesses
)
} else {
// Add as new
merged[normalizedName] = newIngredient
}
}
return Array(merged.values).sorted { $0.confidence > $1.confidence }
}
/// Checks if two ingredient names are similar (e.g., "milk" and "milk 2%")
nonisolated private func isSimilarIngredient(_ name1: String, _ name2: String) -> Bool {
// Exact match
if name1 == name2 { return true }
// One contains the other
if name1.contains(name2) || name2.contains(name1) { return true }
// Common ingredient variations
let variations: [[String]] = [
["milk", "whole milk", "2% milk", "skim milk", "milk 2%"],
["egg", "eggs", "large eggs"],
["butter", "unsalted butter", "salted butter"],
["cheese", "cheddar", "cheddar cheese"],
["chicken", "chicken breast", "chicken thigh"],
["onion", "onions", "yellow onion", "white onion"],
["tomato", "tomatoes", "cherry tomatoes"],
["potato", "potatoes", "russet potato"]
]
for group in variations {
let lowercaseGroup = group.map { $0.lowercased() }
if lowercaseGroup.contains(name1) && lowercaseGroup.contains(name2) {
return true
}
}
return false
}
/// Merges two quantity strings, taking the maximum
nonisolated private func mergeQuantities(_ q1: String, _ q2: String) -> String {
// Extract numeric values
let num1 = extractNumber(from: q1) ?? 0
let num2 = extractNumber(from: q2) ?? 0
// Return the quantity with larger number
return num1 >= num2 ? q1 : q2
}
nonisolated private func extractNumber(from string: String) -> Double? {
let pattern = #"[\d.]+"#
guard let regex = try? NSRegularExpression(pattern: pattern),
let match = regex.firstMatch(in: string, range: NSRange(string.startIndex..., in: string)),
let range = Range(match.range, in: string) else {
return nil
}
return Double(string[range])
}
}
// MARK: - Response Models
private struct GeminiVisionResponse: Codable {
let items: [GeminiVisionItem]
}
private struct GeminiVisionItem: Codable {
let name: String
let quantity: String
let guesses: [GeminiGuess]
}
private struct GeminiGuess: Codable {
let name: String
let confidence: Double
}

View File

@@ -19,13 +19,29 @@ final class ScannerViewModel: ObservableObject {
@Published var error: Error? @Published var error: Error?
@Published var scanProgress: String = "Ready to scan" @Published var scanProgress: String = "Ready to scan"
/// The most recently detected new ingredient (for banner display)
@Published var latestNewIngredient: Ingredient?
private let visionService: VisionService private let visionService: VisionService
private let cameraManager: CameraManager private let cameraManager: CameraManager
private var scanTask: Task<Void, Never>? private var scanTask: Task<Void, Never>?
nonisolated init(visionService: VisionService = ARVisionService(), /// Callback when a new ingredient is detected (not a duplicate)
cameraManager: CameraManager = CameraManager()) { var onNewIngredientDetected: ((Ingredient) -> Void)?
nonisolated init(cameraManager: CameraManager = CameraManager()) {
print("📱 ScannerViewModel.init() - Creating ViewModel at \(Date())") print("📱 ScannerViewModel.init() - Creating ViewModel at \(Date())")
// Select vision service based on configuration
let visionService: VisionService = switch AppConfig.scanningMode {
case .geminiVision:
GeminiVisionService()
case .arKit:
ARVisionService()
}
print("📱 ScannerViewModel.init() - Using \(AppConfig.scanningMode.rawValue) scanning mode")
self.visionService = visionService self.visionService = visionService
self.cameraManager = cameraManager self.cameraManager = cameraManager
} }
@@ -62,19 +78,80 @@ final class ScannerViewModel: ObservableObject {
guard !isScanning else { return } guard !isScanning else { return }
isScanning = true isScanning = true
detectedIngredients.removeAll()
scanProgress = "Scanning ingredients..." scanProgress = "Scanning ingredients..."
print("📱 ScannerViewModel.startScanning() - Started with \(AppConfig.scanningMode.rawValue) mode")
scanTask = Task { scanTask = Task {
let startTime = Date()
do { do {
let stream = cameraManager.frameStream() let stream = cameraManager.frameStream()
let ingredients = try await visionService.detectIngredients(from: stream)
// For Gemini mode, we use real-time detection with callbacks
if AppConfig.scanningMode == .geminiVision {
// Process frames continuously until stopped or timeout
var lastProcessTime = Date()
var currentSecondFrames: [(buffer: CVPixelBuffer, timestamp: Date)] = []
for await frame in stream {
guard !Task.isCancelled else { break }
// Check timeout
if Date().timeIntervalSince(startTime) >= AppConfig.maxScanDuration {
print("📱 ScannerViewModel: Max scan duration reached")
break
}
currentSecondFrames.append((buffer: frame, timestamp: Date()))
// Process every second
let now = Date()
if now.timeIntervalSince(lastProcessTime) >= AppConfig.geminiFrameInterval {
// Pick the frame from the middle of the batch (reasonable approximation)
if let bestFrame = currentSecondFrames[safe: currentSecondFrames.count / 2]?.buffer {
do {
let previousCount = detectedIngredients.count
let ingredients = try await visionService.detectIngredients(from: bestFrame)
// Find new ingredients before merging
let newIngredients = findNewIngredients(ingredients)
// Merge with existing
updateDetectedIngredients(ingredients, mergeMode: true)
// Notify about new ingredients
for newIngredient in newIngredients {
print("🆕 New ingredient detected: \(newIngredient.name)")
latestNewIngredient = newIngredient
onNewIngredientDetected?(newIngredient)
}
scanProgress = "Found \(detectedIngredients.count) items..."
} catch {
print("⚠️ Frame analysis error: \(error)")
// Continue scanning on errors
}
}
currentSecondFrames.removeAll()
lastProcessTime = now
}
// Stop if we have enough ingredients
if detectedIngredients.count >= AppConfig.maxIngredientsPerScan {
break
}
}
} else {
// AR mode: use batch detection
let ingredients = try await visionService.detectIngredients(from: stream)
updateDetectedIngredients(ingredients) updateDetectedIngredients(ingredients)
scanProgress = "Scan complete! Found \(ingredients.count) ingredients" }
scanProgress = "Scan complete! Found \(detectedIngredients.count) ingredients"
} catch { } catch {
self.error = error self.error = error
scanProgress = "Scan failed" scanProgress = "Scan failed: \(error.localizedDescription)"
} }
isScanning = false isScanning = false
@@ -123,20 +200,58 @@ final class ScannerViewModel: ObservableObject {
// MARK: - Ingredient Management // MARK: - Ingredient Management
/// Finds ingredients that are truly new (not already in our list)
private func findNewIngredients(_ newIngredients: [Ingredient]) -> [Ingredient] {
return newIngredients.filter { newIngredient in
!detectedIngredients.contains { existing in
isSimilarIngredient(existing.name, newIngredient.name)
}
}
}
/// Checks if two ingredient names refer to the same item
private func isSimilarIngredient(_ name1: String, _ name2: String) -> Bool {
let n1 = name1.lowercased()
let n2 = name2.lowercased()
// Exact match
if n1 == n2 { return true }
// One contains the other
if n1.contains(n2) || n2.contains(n1) { return true }
return false
}
private func updateDetectedIngredients(_ newIngredients: [Ingredient], mergeMode: Bool = false) { private func updateDetectedIngredients(_ newIngredients: [Ingredient], mergeMode: Bool = false) {
if mergeMode { if mergeMode {
// Merge with existing ingredients, keeping higher confidence // Merge with existing ingredients, keeping higher confidence and max quantity
var merged = detectedIngredients.reduce(into: [String: Ingredient]()) { dict, ingredient in var merged = detectedIngredients.reduce(into: [String: Ingredient]()) { dict, ingredient in
dict[ingredient.name] = ingredient dict[ingredient.name.lowercased()] = ingredient
} }
for ingredient in newIngredients { for ingredient in newIngredients {
if let existing = merged[ingredient.name] { let normalizedName = ingredient.name.lowercased()
if ingredient.confidence > existing.confidence {
merged[ingredient.name] = ingredient // Check for similar existing items
let similarKey = merged.keys.first { existingKey in
isSimilarIngredient(existingKey, normalizedName)
} }
if let key = similarKey, let existing = merged[key] {
// Merge: take max quantity, higher confidence
let mergedQuantity = mergeQuantities(existing.estimatedQuantity, ingredient.estimatedQuantity)
let mergedConfidence = max(existing.confidence, ingredient.confidence)
merged[key] = Ingredient(
id: existing.id,
name: existing.name,
estimatedQuantity: mergedQuantity,
confidence: mergedConfidence,
guesses: existing.guesses.isEmpty ? ingredient.guesses : existing.guesses
)
} else { } else {
merged[ingredient.name] = ingredient merged[normalizedName] = ingredient
} }
} }
@@ -146,6 +261,23 @@ final class ScannerViewModel: ObservableObject {
} }
} }
/// Merges two quantity strings, taking the maximum numeric value
private func mergeQuantities(_ q1: String, _ q2: String) -> String {
let num1 = extractNumber(from: q1) ?? 0
let num2 = extractNumber(from: q2) ?? 0
return num1 >= num2 ? q1 : q2
}
private func extractNumber(from string: String) -> Double? {
let pattern = #"[\d.]+"#
guard let regex = try? NSRegularExpression(pattern: pattern),
let match = regex.firstMatch(in: string, range: NSRange(string.startIndex..., in: string)),
let range = Range(match.range, in: string) else {
return nil
}
return Double(string[range])
}
func addIngredient(_ ingredient: Ingredient) { func addIngredient(_ ingredient: Ingredient) {
if !detectedIngredients.contains(where: { $0.id == ingredient.id }) { if !detectedIngredients.contains(where: { $0.id == ingredient.id }) {
detectedIngredients.append(ingredient) detectedIngredients.append(ingredient)
@@ -181,4 +313,52 @@ final class ScannerViewModel: ObservableObject {
await cameraManager.cleanup() await cameraManager.cleanup()
print("📱 ScannerViewModel.cleanup() - ✅ Cleanup complete") print("📱 ScannerViewModel.cleanup() - ✅ Cleanup complete")
} }
// MARK: - Local Persistence
/// Saves ingredients locally using UserDefaults
/// TODO: Migrate to FirestoreRepository when Firebase is configured
/// To migrate: Replace this method with a call to FirestoreRepository.saveIngredients()
func saveIngredientsLocally() {
do {
let data = try JSONEncoder().encode(detectedIngredients)
UserDefaults.standard.set(data, forKey: "savedIngredients")
print("💾 Saved \(detectedIngredients.count) ingredients locally")
} catch {
print("❌ Failed to save ingredients: \(error)")
}
}
/// Loads ingredients from local storage
/// TODO: Migrate to FirestoreRepository when Firebase is configured
/// To migrate: Replace this method with a call to FirestoreRepository.loadIngredients()
func loadIngredientsLocally() {
guard let data = UserDefaults.standard.data(forKey: "savedIngredients") else {
print("📂 No saved ingredients found")
return
}
do {
detectedIngredients = try JSONDecoder().decode([Ingredient].self, from: data)
print("📂 Loaded \(detectedIngredients.count) ingredients from local storage")
} catch {
print("❌ Failed to load ingredients: \(error)")
}
}
/// Clears all saved ingredients
func clearSavedIngredients() {
detectedIngredients.removeAll()
UserDefaults.standard.removeObject(forKey: "savedIngredients")
print("🗑️ Cleared all saved ingredients")
}
}
// MARK: - Array Safe Subscript Extension
extension Collection {
/// Returns the element at the specified index if it exists, otherwise nil.
subscript(safe index: Index) -> Element? {
indices.contains(index) ? self[index] : nil
}
} }

View File

@@ -8,6 +8,7 @@
import SwiftUI import SwiftUI
import ARKit import ARKit
import RealityKit import RealityKit
import AVFoundation
struct ScannerView: View { struct ScannerView: View {
@StateObject private var viewModel = ScannerViewModel() @StateObject private var viewModel = ScannerViewModel()
@@ -18,6 +19,11 @@ struct ScannerView: View {
@State private var showARView = false @State private var showARView = false
@State private var previewLayer: AVCaptureVideoPreviewLayer? @State private var previewLayer: AVCaptureVideoPreviewLayer?
// Banner notification state
@State private var showBanner = false
@State private var bannerIngredient: Ingredient?
@State private var bannerTask: Task<Void, Never>?
init() { init() {
print("🔵 ScannerView.init() - View initialized at \(Date())") print("🔵 ScannerView.init() - View initialized at \(Date())")
} }
@@ -49,6 +55,14 @@ struct ScannerView: View {
// Overlay UI // Overlay UI
VStack { VStack {
// New ingredient banner (top of screen)
if showBanner, let ingredient = bannerIngredient {
NewIngredientBanner(ingredient: ingredient)
.transition(.move(edge: .top).combined(with: .opacity))
.padding(.horizontal)
.padding(.top, 8)
}
// Top status bar // Top status bar
statusBar statusBar
.padding() .padding()
@@ -71,7 +85,7 @@ struct ScannerView: View {
.padding() .padding()
} }
} }
.navigationTitle(showARView ? "AR Scanner" : "Camera Preview") .navigationTitle(showARView ? "AR Scanner" : "Ingredient Scanner")
.navigationBarTitleDisplayMode(.inline) .navigationBarTitleDisplayMode(.inline)
.toolbar { .toolbar {
ToolbarItem(placement: .navigationBarTrailing) { ToolbarItem(placement: .navigationBarTrailing) {
@@ -84,6 +98,15 @@ struct ScannerView: View {
} }
.task { .task {
print("🔵 ScannerView.task - Task started at \(Date())") print("🔵 ScannerView.task - Task started at \(Date())")
// Load any previously saved ingredients
viewModel.loadIngredientsLocally()
// Setup new ingredient notification handler
viewModel.onNewIngredientDetected = { [self] ingredient in
showNewIngredientBanner(ingredient)
}
if !showARView { if !showARView {
print("🔵 ScannerView.task - Calling setupCamera()") print("🔵 ScannerView.task - Calling setupCamera()")
await viewModel.setupCamera() await viewModel.setupCamera()
@@ -96,10 +119,17 @@ struct ScannerView: View {
} }
.onDisappear { .onDisappear {
print("🔵 ScannerView.onDisappear - Cleaning up at \(Date())") print("🔵 ScannerView.onDisappear - Cleaning up at \(Date())")
bannerTask?.cancel()
Task { Task {
await viewModel.cleanup() await viewModel.cleanup()
} }
} }
.onChange(of: viewModel.isScanning) { wasScanning, isScanning in
// When scanning stops, save ingredients and optionally navigate
if wasScanning && !isScanning && !viewModel.detectedIngredients.isEmpty {
viewModel.saveIngredientsLocally()
}
}
.alert("Camera Error", isPresented: .constant(viewModel.error != nil)) { .alert("Camera Error", isPresented: .constant(viewModel.error != nil)) {
Button("OK") { Button("OK") {
viewModel.error = nil viewModel.error = nil
@@ -120,6 +150,28 @@ struct ScannerView: View {
} }
} }
// MARK: - Banner Management
private func showNewIngredientBanner(_ ingredient: Ingredient) {
// Cancel any existing banner dismissal
bannerTask?.cancel()
// Show new banner
withAnimation(.spring(response: 0.3)) {
bannerIngredient = ingredient
showBanner = true
}
// Auto-dismiss after 1 second
bannerTask = Task { @MainActor in
try? await Task.sleep(for: .seconds(1))
guard !Task.isCancelled else { return }
withAnimation(.easeOut(duration: 0.3)) {
showBanner = false
}
}
}
// MARK: - UI Components // MARK: - UI Components
private var statusBar: some View { private var statusBar: some View {
@@ -353,6 +405,58 @@ struct ManualIngredientEntry: View {
} }
} }
// MARK: - New Ingredient Banner
struct NewIngredientBanner: View {
let ingredient: Ingredient
var body: some View {
HStack(spacing: 12) {
Image(systemName: "plus.circle.fill")
.foregroundStyle(.white)
.font(.title2)
VStack(alignment: .leading, spacing: 2) {
Text("New Item Detected")
.font(.caption)
.foregroundStyle(.white.opacity(0.8))
Text(ingredient.name.capitalized)
.font(.headline)
.foregroundStyle(.white)
if !ingredient.estimatedQuantity.isEmpty {
Text("Qty: \(ingredient.estimatedQuantity)")
.font(.caption)
.foregroundStyle(.white.opacity(0.8))
}
}
Spacer()
// Confidence indicator
Text("\(Int(ingredient.confidence * 100))%")
.font(.caption)
.fontWeight(.semibold)
.foregroundStyle(.white)
.padding(.horizontal, 8)
.padding(.vertical, 4)
.background(.white.opacity(0.2))
.clipShape(Capsule())
}
.padding()
.background(
LinearGradient(
colors: [Color.green, Color.green.opacity(0.8)],
startPoint: .leading,
endPoint: .trailing
)
)
.clipShape(RoundedRectangle(cornerRadius: 12))
.shadow(color: .black.opacity(0.2), radius: 8, y: 4)
}
}
#Preview { #Preview {
ScannerView() ScannerView()
} }