API

2026-03-29 06:57:34 -04:00
commit 37503231b3
31 changed files with 3444 additions and 0 deletions
--- a/app/services/llm.py
+++ b/app/services/llm.py
@@ -0,0 +1,351 @@
+import base64
+import json
+import logging
+
+from app.config import settings
+
+logger = logging.getLogger(__name__)
+
+# ── Provider setup: prefer Anthropic, fall back to Gemini ──
+
+_provider: str | None = None
+
+if settings.ANTHROPIC_API_KEY:
+    import anthropic
+    _anthropic_client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
+    _provider = "anthropic"
+    _model = "claude-sonnet-4-20250514"
+    logger.info("LLM provider: Anthropic (Claude)")
+
+elif settings.GEMINI_API_KEY:
+    from google import genai
+    from google.genai import types as genai_types
+    _gemini_client = genai.Client(api_key=settings.GEMINI_API_KEY)
+    _provider = "gemini"
+    _model = "gemini-3.1-pro-preview"
+    logger.info("LLM provider: Google (Gemini)")
+
+
+def _parse_json(text: str) -> dict | list:
+    import re
+    text = text.strip()
+    # Strip markdown code fences
+    if text.startswith("```"):
+        text = text.split("\n", 1)[1]
+        text = text.rsplit("```", 1)[0]
+    # Find the first { or [ and last } or ]
+    start = -1
+    for i, c in enumerate(text):
+        if c in "{[":
+            start = i
+            break
+    if start == -1:
+        raise ValueError(f"No JSON found in LLM response: {text[:200]}")
+    end = max(text.rfind("}"), text.rfind("]"))
+    if end == -1:
+        raise ValueError(f"No closing bracket in LLM response: {text[:200]}")
+    json_str = text[start:end + 1]
+    # Strip // comments (Gemini sometimes adds these)
+    json_str = re.sub(r'//[^\n]*', '', json_str)
+    # Strip trailing commas before } or ]
+    json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
+    return json.loads(json_str)
+
+
+def _check_provider():
+    if not _provider:
+        raise RuntimeError("No LLM API key configured. Set ANTHROPIC_API_KEY or GEMINI_API_KEY in .env")
+
+
+async def _text_completion(system: str, user_content: str, max_tokens: int = 1024) -> str:
+    _check_provider()
+    if _provider == "anthropic":
+        response = _anthropic_client.messages.create(
+            model=_model,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": f"{system}\n\n{user_content}"}],
+        )
+        return response.content[0].text
+    else:
+        response = _gemini_client.models.generate_content(
+            model=_model,
+            config={"system_instruction": system},
+            contents=user_content,
+        )
+        return response.text
+
+
+async def _vision_completion(system: str, image_bytes: bytes, user_text: str, max_tokens: int = 512) -> str:
+    _check_provider()
+    if _provider == "anthropic":
+        image_b64 = base64.b64encode(image_bytes).decode()
+        response = _anthropic_client.messages.create(
+            model=_model,
+            max_tokens=max_tokens,
+            messages=[{
+                "role": "user",
+                "content": [
+                    {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_b64}},
+                    {"type": "text", "text": f"{system}\n\n{user_text}"},
+                ],
+            }],
+        )
+        return response.content[0].text
+    else:
+        response = _gemini_client.models.generate_content(
+            model=_model,
+            config={"system_instruction": system},
+            contents=[
+                genai_types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
+                user_text,
+            ],
+        )
+        return response.text
+
+
+# ── Public API (unchanged signatures) ──
+
+
+async def parse_brain_dump(raw_text: str, timezone: str) -> dict:
+    from datetime import datetime
+
+    system = f"""You are a task parser and ADHD-friendly planner.
+Extract structured tasks from this brain dump, then break each task into
+concrete, actionable steps someone with ADHD can start immediately.
+
+Today's date: {datetime.now().strftime("%Y-%m-%d")}
+User's timezone: {timezone}
+
+Task extraction rules:
+- Be generous with deadlines — infer from context.
+- If no deadline is obvious, set priority to 0 (unset).
+- Unrelated items stay as separate top-level tasks.
+
+Step rules (applied to every task's subtasks array):
+- Each step should be 5-15 minutes, specific enough to start without decision paralysis.
+- First step should be the EASIEST to reduce activation energy.
+- Steps explicitly mentioned in the brain dump have "suggested": false.
+- Then ADD 1-3 additional steps the user likely needs but didn't mention, with "suggested": true.
+  Examples: "gather materials", "review before sending", "set a reminder", "test it works".
+- Keep step titles short and action-oriented.
+- Every task should have at least 2 steps total.
+
+Respond ONLY with JSON, no other text.
+Example:
+{{
+  "parsed_tasks": [{{
+    "title": "concise task title",
+    "description": "any extra detail from the dump",
+    "deadline": "ISO 8601 or null",
+    "priority": "0-4 integer (0=unset, 1=low, 2=med, 3=high, 4=urgent)",
+    "estimated_minutes": "total for all steps or null",
+    "tags": ["work", "personal", "health", "errands", etc.],
+    "subtasks": [
+      {{"title": "step from the dump", "description": null, "deadline": null, "estimated_minutes": 10, "suggested": false}},
+      {{"title": "AI-suggested next step", "description": null, "deadline": null, "estimated_minutes": 5, "suggested": true}}
+    ]
+  }}],
+  "unparseable_fragments": ["text that couldn't be parsed into tasks"]
+}}"""
+
+    text = await _text_completion(system, f"Brain dump:\n{raw_text}", max_tokens=2048)
+    return _parse_json(text)
+
+
+
+
+async def generate_step_plan(task_title: str, task_description: str | None, estimated_minutes: int | None) -> list:
+    est = f"{estimated_minutes} minutes" if estimated_minutes else "unknown"
+    system = f"""You are an ADHD-friendly task planner.
+Break this task into concrete steps of 5-15 minutes each.
+Each step should be specific enough that someone with ADHD
+can start immediately without decision paralysis.
+
+Rules:
+- First step should be the EASIEST (reduce activation energy)
+- Steps should be independently completable
+- Include time estimates per step
+- Total estimated time should roughly match the task estimate
+- No step longer than 15 minutes
+
+Respond ONLY with JSON array:
+[{{
+  "sort_order": 1,
+  "title": "specific action description",
+  "description": "additional detail if needed",
+  "estimated_minutes": number
+}}]"""
+
+    text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}\nEstimated total: {est}")
+    return _parse_json(text)
+
+
+async def analyze_screenshot(
+    screenshot_bytes: bytes,
+    window_title: str,
+    task_context: dict,
+    recent_summaries: list[str] | None = None,
+) -> dict:
+    """Legacy server-side VLM analysis. Upgraded with friction detection prompt."""
+    steps_text = ""
+    for s in task_context.get("steps", []):
+        cp = f' checkpoint_note="{s["checkpoint_note"]}"' if s.get("checkpoint_note") else ""
+        steps_text += f'  - [{s["status"]}] {s["sort_order"]}. {s["title"]} (id={s["id"]}){cp}\n'
+
+    history_text = ""
+    if recent_summaries:
+        for i, summary in enumerate(recent_summaries):
+            history_text += f"  - [{(len(recent_summaries) - i) * 5}s ago] {summary}\n"
+
+    system = f"""You are a proactive focus assistant analyzing a user's screen.
+The user's current task and step progress:
+  Task: {task_context.get("task_title", "")}
+  Goal: {task_context.get("task_goal", "")}
+  Steps:
+{steps_text}  Window title reported by OS: {window_title}
+{"Recent screen history:" + chr(10) + history_text if history_text else ""}
+Analyze the current screenshot. Determine:
+
+1. TASK STATUS: Is the user working on their task? Which step? Any steps completed?
+2. CHECKPOINT: What specific within-step progress have they made?
+3. FRICTION DETECTION: Is the user stuck in any of these patterns?
+   - REPETITIVE_LOOP: Switching between same 2-3 windows (copying data manually)
+   - STALLED: Same screen region with minimal changes for extended time
+   - TEDIOUS_MANUAL: Doing automatable work (filling forms, organizing files, transcribing)
+   - CONTEXT_OVERHEAD: Many windows open, visibly searching across them
+   - TASK_RESUMPTION: User just returned to a task they were working on earlier
+4. INTENT: If viewing informational content, is the user SKIMMING, ENGAGED, or UNCLEAR?
+5. PROPOSED ACTION: If friction detected, suggest a specific action the AI could take.
+
+Respond ONLY with JSON:
+{{
+  "on_task": boolean,
+  "current_step_id": "step UUID or null",
+  "checkpoint_note_update": "within-step progress or null",
+  "steps_completed": ["UUIDs"],
+  "friction": {{
+    "type": "repetitive_loop | stalled | tedious_manual | context_overhead | task_resumption | none",
+    "confidence": 0.0-1.0,
+    "description": "what the user is struggling with or null",
+    "proposed_actions": [
+      {{"label": "action description", "action_type": "auto_extract | brain_dump", "details": "specifics"}}
+    ],
+    "source_context": "what info to extract from or null",
+    "target_context": "where to put it or null"
+  }},
+  "intent": "skimming | engaged | unclear | null",
+  "distraction_type": "app_switch | browsing | idle | null",
+  "app_name": "primary visible application",
+  "confidence": 0.0-1.0,
+  "gentle_nudge": "nudge if distracted and no friction action applies, null otherwise",
+  "vlm_summary": "1-sentence factual description of screen"
+}}"""
+
+    text = await _vision_completion(system, screenshot_bytes, "Analyze this screenshot.")
+    return _parse_json(text)
+
+
+async def generate_resume_card(
+    task_title: str,
+    goal: str | None,
+    current_step_title: str | None,
+    checkpoint_note: str | None,
+    completed_count: int,
+    total_count: int,
+    next_step_title: str | None,
+    minutes_away: int,
+    attention_score: int | None,
+) -> dict:
+    system = """Generate a brief, encouraging context-resume card for
+someone with ADHD returning to their task.
+Be warm, specific, and action-oriented. No shame. No generic platitudes.
+Use the checkpoint_note to give hyper-specific context about where they left off.
+
+Respond ONLY with JSON:
+{
+  "welcome_back": "short friendly greeting (max 8 words)",
+  "you_were_doing": "1 sentence referencing checkpoint_note specifically",
+  "next_step": "concrete next action with time estimate",
+  "motivation": "1 sentence encouragement (ADHD-friendly, no shame)"
+}"""
+
+    user_content = f"""Inputs:
+  - Task: {task_title}
+  - Overall goal: {goal or "N/A"}
+  - Current step: {current_step_title or "N/A"}
+  - Current step checkpoint_note: {checkpoint_note or "N/A"}
+  - Steps completed: {completed_count} of {total_count}
+  - Next step after current: {next_step_title or "N/A"}
+  - Time away: {minutes_away} minutes
+  - Attention score before leaving: {attention_score or "N/A"}"""
+
+    text = await _text_completion(system, user_content, max_tokens=256)
+    return _parse_json(text)
+
+
+async def generate_app_activity_nudge(
+    app_name: str,
+    duration_seconds: int,
+    task_title: str,
+    current_step_title: str | None,
+    checkpoint_note: str | None,
+) -> str:
+    minutes = duration_seconds // 60
+    duration_text = f"{minutes} minute{'s' if minutes != 1 else ''}" if minutes > 0 else f"{duration_seconds} seconds"
+
+    system = """Generate a single gentle, non-judgmental nudge for someone with ADHD
+who drifted to a non-work app during a focus session.
+Reference their specific progress to make returning easier.
+No shame. Keep it under 30 words.
+Respond with ONLY the nudge text, no JSON, no quotes."""
+
+    user_content = f"""Context:
+  - Distraction app: {app_name}
+  - Time spent: {duration_text}
+  - Current task: {task_title}
+  - Current step: {current_step_title or "N/A"}
+  - Progress so far: {checkpoint_note or "N/A"}"""
+
+    return (await _text_completion(system, user_content, max_tokens=100)).strip()
+
+
+async def suggest_work_apps(task_title: str, task_description: str | None) -> dict:
+    system = """Given this task, suggest which Apple apps the user likely needs.
+Return the most likely single app as the primary suggestion.
+
+Respond ONLY with JSON:
+{
+  "suggested_app_scheme": "URL scheme (e.g. mobilenotes://, x-apple-pages://, com.google.docs://)",
+  "suggested_app_name": "human-readable name (e.g. Notes, Pages, Google Docs)"
+}"""
+
+    text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}", max_tokens=100)
+    return _parse_json(text)
+
+
+async def prioritize_tasks(tasks_json: list, timezone: str) -> list:
+    from datetime import datetime
+
+    system = """You are an ADHD-friendly task prioritizer.
+Consider: deadlines, estimated effort, task dependencies,
+and the user's energy patterns.
+
+Rules:
+- Hard deadlines always take top priority
+- Front-load quick wins (<15min) for momentum
+- Group errands together
+- Deprioritize tasks with no deadline and low urgency
+
+Respond ONLY with JSON array:
+[{
+  "task_id": "uuid",
+  "recommended_priority": 1-4,
+  "reason": "1-sentence explanation"
+}]"""
+
+    user_content = f"""Input: {json.dumps(tasks_json)}
+Current time: {datetime.now().isoformat()}
+User's timezone: {timezone}"""
+
+    text = await _text_completion(system, user_content, max_tokens=512)
+    return _parse_json(text)