import base64 import json import logging from app.config import settings logger = logging.getLogger(__name__) # ── Provider setup: prefer Anthropic, fall back to Gemini ── _provider: str | None = None if settings.ANTHROPIC_API_KEY: import anthropic _anthropic_client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY) _provider = "anthropic" _model = "claude-sonnet-4-20250514" logger.info("LLM provider: Anthropic (Claude)") elif settings.GEMINI_API_KEY: from google import genai from google.genai import types as genai_types _gemini_client = genai.Client(api_key=settings.GEMINI_API_KEY) _provider = "gemini" _model = "gemini-3.1-pro-preview" logger.info("LLM provider: Google (Gemini)") def _parse_json(text: str) -> dict | list: import re text = text.strip() # Strip markdown code fences if text.startswith("```"): text = text.split("\n", 1)[1] text = text.rsplit("```", 1)[0] # Find the first { or [ and last } or ] start = -1 for i, c in enumerate(text): if c in "{[": start = i break if start == -1: raise ValueError(f"No JSON found in LLM response: {text[:200]}") end = max(text.rfind("}"), text.rfind("]")) if end == -1: raise ValueError(f"No closing bracket in LLM response: {text[:200]}") json_str = text[start:end + 1] # Strip // comments (Gemini sometimes adds these) json_str = re.sub(r'//[^\n]*', '', json_str) # Strip trailing commas before } or ] json_str = re.sub(r',\s*([}\]])', r'\1', json_str) return json.loads(json_str) def _check_provider(): if not _provider: raise RuntimeError("No LLM API key configured. Set ANTHROPIC_API_KEY or GEMINI_API_KEY in .env") async def _text_completion(system: str, user_content: str, max_tokens: int = 1024) -> str: _check_provider() if _provider == "anthropic": response = _anthropic_client.messages.create( model=_model, max_tokens=max_tokens, messages=[{"role": "user", "content": f"{system}\n\n{user_content}"}], ) return response.content[0].text else: response = _gemini_client.models.generate_content( model=_model, config={"system_instruction": system}, contents=user_content, ) return response.text async def _vision_completion(system: str, image_bytes: bytes, user_text: str, max_tokens: int = 512) -> str: _check_provider() if _provider == "anthropic": image_b64 = base64.b64encode(image_bytes).decode() response = _anthropic_client.messages.create( model=_model, max_tokens=max_tokens, messages=[{ "role": "user", "content": [ {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_b64}}, {"type": "text", "text": f"{system}\n\n{user_text}"}, ], }], ) return response.content[0].text else: response = _gemini_client.models.generate_content( model=_model, config={"system_instruction": system}, contents=[ genai_types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"), user_text, ], ) return response.text # ── Public API (unchanged signatures) ── async def parse_brain_dump(raw_text: str, timezone: str) -> dict: from datetime import datetime system = f"""You are a task parser and ADHD-friendly planner. Extract structured tasks from this brain dump, then break each task into concrete, actionable steps someone with ADHD can start immediately. Today's date: {datetime.now().strftime("%Y-%m-%d")} User's timezone: {timezone} Task extraction rules: - Be generous with deadlines — infer from context. - If no deadline is obvious, set priority to 0 (unset). - Unrelated items stay as separate top-level tasks. Step rules (applied to every task's subtasks array): - Each step should be 5-15 minutes, specific enough to start without decision paralysis. - First step should be the EASIEST to reduce activation energy. - Steps explicitly mentioned in the brain dump have "suggested": false. - Then ADD 1-3 additional steps the user likely needs but didn't mention, with "suggested": true. Examples: "gather materials", "review before sending", "set a reminder", "test it works". - Keep step titles short and action-oriented. - Every task should have at least 2 steps total. Respond ONLY with JSON, no other text. Example: {{ "parsed_tasks": [{{ "title": "concise task title", "description": "any extra detail from the dump", "deadline": "ISO 8601 or null", "priority": "0-4 integer (0=unset, 1=low, 2=med, 3=high, 4=urgent)", "estimated_minutes": "total for all steps or null", "tags": ["work", "personal", "health", "errands", etc.], "subtasks": [ {{"title": "step from the dump", "description": null, "deadline": null, "estimated_minutes": 10, "suggested": false}}, {{"title": "AI-suggested next step", "description": null, "deadline": null, "estimated_minutes": 5, "suggested": true}} ] }}], "unparseable_fragments": ["text that couldn't be parsed into tasks"] }}""" text = await _text_completion(system, f"Brain dump:\n{raw_text}", max_tokens=2048) return _parse_json(text) async def generate_step_plan(task_title: str, task_description: str | None, estimated_minutes: int | None) -> list: est = f"{estimated_minutes} minutes" if estimated_minutes else "unknown" system = f"""You are an ADHD-friendly task planner. Break this task into concrete steps of 5-15 minutes each. Each step should be specific enough that someone with ADHD can start immediately without decision paralysis. Rules: - First step should be the EASIEST (reduce activation energy) - Steps should be independently completable - Include time estimates per step - Total estimated time should roughly match the task estimate - No step longer than 15 minutes Respond ONLY with JSON array: [{{ "sort_order": 1, "title": "specific action description", "description": "additional detail if needed", "estimated_minutes": number }}]""" text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}\nEstimated total: {est}") return _parse_json(text) async def analyze_screenshot( screenshot_bytes: bytes, window_title: str, task_context: dict, recent_summaries: list[str] | None = None, ) -> dict: """Legacy server-side VLM analysis. Upgraded with friction detection prompt.""" steps_text = "" for s in task_context.get("steps", []): cp = f' checkpoint_note="{s["checkpoint_note"]}"' if s.get("checkpoint_note") else "" steps_text += f' - [{s["status"]}] {s["sort_order"]}. {s["title"]} (id={s["id"]}){cp}\n' history_text = "" if recent_summaries: for i, summary in enumerate(recent_summaries): history_text += f" - [{(len(recent_summaries) - i) * 5}s ago] {summary}\n" system = f"""You are a proactive focus assistant analyzing a user's screen. The user's current task and step progress: Task: {task_context.get("task_title", "")} Goal: {task_context.get("task_goal", "")} Steps: {steps_text} Window title reported by OS: {window_title} {"Recent screen history:" + chr(10) + history_text if history_text else ""} Analyze the current screenshot. Determine: 1. TASK STATUS: Is the user working on their task? Which step? Any steps completed? 2. CHECKPOINT: What specific within-step progress have they made? 3. FRICTION DETECTION: Is the user stuck in any of these patterns? - REPETITIVE_LOOP: Switching between same 2-3 windows (copying data manually) - STALLED: Same screen region with minimal changes for extended time - TEDIOUS_MANUAL: Doing automatable work (filling forms, organizing files, transcribing) - CONTEXT_OVERHEAD: Many windows open, visibly searching across them - TASK_RESUMPTION: User just returned to a task they were working on earlier 4. INTENT: If viewing informational content, is the user SKIMMING, ENGAGED, or UNCLEAR? 5. PROPOSED ACTION: If friction detected, suggest a specific action the AI could take. Respond ONLY with JSON: {{ "on_task": boolean, "current_step_id": "step UUID or null", "checkpoint_note_update": "within-step progress or null", "steps_completed": ["UUIDs"], "friction": {{ "type": "repetitive_loop | stalled | tedious_manual | context_overhead | task_resumption | none", "confidence": 0.0-1.0, "description": "what the user is struggling with or null", "proposed_actions": [ {{"label": "action description", "action_type": "auto_extract | brain_dump", "details": "specifics"}} ], "source_context": "what info to extract from or null", "target_context": "where to put it or null" }}, "intent": "skimming | engaged | unclear | null", "distraction_type": "app_switch | browsing | idle | null", "app_name": "primary visible application", "confidence": 0.0-1.0, "gentle_nudge": "nudge if distracted and no friction action applies, null otherwise", "vlm_summary": "1-sentence factual description of screen" }}""" text = await _vision_completion(system, screenshot_bytes, "Analyze this screenshot.") return _parse_json(text) async def generate_resume_card( task_title: str, goal: str | None, current_step_title: str | None, checkpoint_note: str | None, completed_count: int, total_count: int, next_step_title: str | None, minutes_away: int, attention_score: int | None, ) -> dict: system = """Generate a brief, encouraging context-resume card for someone with ADHD returning to their task. Be warm, specific, and action-oriented. No shame. No generic platitudes. Use the checkpoint_note to give hyper-specific context about where they left off. Respond ONLY with JSON: { "welcome_back": "short friendly greeting (max 8 words)", "you_were_doing": "1 sentence referencing checkpoint_note specifically", "next_step": "concrete next action with time estimate", "motivation": "1 sentence encouragement (ADHD-friendly, no shame)" }""" user_content = f"""Inputs: - Task: {task_title} - Overall goal: {goal or "N/A"} - Current step: {current_step_title or "N/A"} - Current step checkpoint_note: {checkpoint_note or "N/A"} - Steps completed: {completed_count} of {total_count} - Next step after current: {next_step_title or "N/A"} - Time away: {minutes_away} minutes - Attention score before leaving: {attention_score or "N/A"}""" text = await _text_completion(system, user_content, max_tokens=256) return _parse_json(text) async def generate_app_activity_nudge( app_name: str, duration_seconds: int, task_title: str, current_step_title: str | None, checkpoint_note: str | None, ) -> str: minutes = duration_seconds // 60 duration_text = f"{minutes} minute{'s' if minutes != 1 else ''}" if minutes > 0 else f"{duration_seconds} seconds" system = """Generate a single gentle, non-judgmental nudge for someone with ADHD who drifted to a non-work app during a focus session. Reference their specific progress to make returning easier. No shame. Keep it under 30 words. Respond with ONLY the nudge text, no JSON, no quotes.""" user_content = f"""Context: - Distraction app: {app_name} - Time spent: {duration_text} - Current task: {task_title} - Current step: {current_step_title or "N/A"} - Progress so far: {checkpoint_note or "N/A"}""" return (await _text_completion(system, user_content, max_tokens=100)).strip() async def suggest_work_apps(task_title: str, task_description: str | None) -> dict: system = """Given this task, suggest which Apple apps the user likely needs. Return the most likely single app as the primary suggestion. Respond ONLY with JSON: { "suggested_app_scheme": "URL scheme (e.g. mobilenotes://, x-apple-pages://, com.google.docs://)", "suggested_app_name": "human-readable name (e.g. Notes, Pages, Google Docs)" }""" text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}", max_tokens=100) return _parse_json(text) async def prioritize_tasks(tasks_json: list, timezone: str) -> list: from datetime import datetime system = """You are an ADHD-friendly task prioritizer. Consider: deadlines, estimated effort, task dependencies, and the user's energy patterns. Rules: - Hard deadlines always take top priority - Front-load quick wins (<15min) for momentum - Group errands together - Deprioritize tasks with no deadline and low urgency Respond ONLY with JSON array: [{ "task_id": "uuid", "recommended_priority": 1-4, "reason": "1-sentence explanation" }]""" user_content = f"""Input: {json.dumps(tasks_json)} Current time: {datetime.now().isoformat()} User's timezone: {timezone}""" text = await _text_completion(system, user_content, max_tokens=512) return _parse_json(text)