Files

352 lines
13 KiB
Python
Raw Permalink Normal View History

2026-03-29 06:57:34 -04:00
import base64
import json
import logging
from app.config import settings
logger = logging.getLogger(__name__)
# ── Provider setup: prefer Anthropic, fall back to Gemini ──
_provider: str | None = None
if settings.ANTHROPIC_API_KEY:
import anthropic
_anthropic_client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
_provider = "anthropic"
_model = "claude-sonnet-4-20250514"
logger.info("LLM provider: Anthropic (Claude)")
elif settings.GEMINI_API_KEY:
from google import genai
from google.genai import types as genai_types
_gemini_client = genai.Client(api_key=settings.GEMINI_API_KEY)
_provider = "gemini"
_model = "gemini-3.1-pro-preview"
logger.info("LLM provider: Google (Gemini)")
def _parse_json(text: str) -> dict | list:
import re
text = text.strip()
# Strip markdown code fences
if text.startswith("```"):
text = text.split("\n", 1)[1]
text = text.rsplit("```", 1)[0]
# Find the first { or [ and last } or ]
start = -1
for i, c in enumerate(text):
if c in "{[":
start = i
break
if start == -1:
raise ValueError(f"No JSON found in LLM response: {text[:200]}")
end = max(text.rfind("}"), text.rfind("]"))
if end == -1:
raise ValueError(f"No closing bracket in LLM response: {text[:200]}")
json_str = text[start:end + 1]
# Strip // comments (Gemini sometimes adds these)
json_str = re.sub(r'//[^\n]*', '', json_str)
# Strip trailing commas before } or ]
json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
return json.loads(json_str)
def _check_provider():
if not _provider:
raise RuntimeError("No LLM API key configured. Set ANTHROPIC_API_KEY or GEMINI_API_KEY in .env")
async def _text_completion(system: str, user_content: str, max_tokens: int = 1024) -> str:
_check_provider()
if _provider == "anthropic":
response = _anthropic_client.messages.create(
model=_model,
max_tokens=max_tokens,
messages=[{"role": "user", "content": f"{system}\n\n{user_content}"}],
)
return response.content[0].text
else:
response = _gemini_client.models.generate_content(
model=_model,
config={"system_instruction": system},
contents=user_content,
)
return response.text
async def _vision_completion(system: str, image_bytes: bytes, user_text: str, max_tokens: int = 512) -> str:
_check_provider()
if _provider == "anthropic":
image_b64 = base64.b64encode(image_bytes).decode()
response = _anthropic_client.messages.create(
model=_model,
max_tokens=max_tokens,
messages=[{
"role": "user",
"content": [
{"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_b64}},
{"type": "text", "text": f"{system}\n\n{user_text}"},
],
}],
)
return response.content[0].text
else:
response = _gemini_client.models.generate_content(
model=_model,
config={"system_instruction": system},
contents=[
genai_types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
user_text,
],
)
return response.text
# ── Public API (unchanged signatures) ──
async def parse_brain_dump(raw_text: str, timezone: str) -> dict:
from datetime import datetime
system = f"""You are a task parser and ADHD-friendly planner.
Extract structured tasks from this brain dump, then break each task into
concrete, actionable steps someone with ADHD can start immediately.
Today's date: {datetime.now().strftime("%Y-%m-%d")}
User's timezone: {timezone}
Task extraction rules:
- Be generous with deadlines infer from context.
- If no deadline is obvious, set priority to 0 (unset).
- Unrelated items stay as separate top-level tasks.
Step rules (applied to every task's subtasks array):
- Each step should be 5-15 minutes, specific enough to start without decision paralysis.
- First step should be the EASIEST to reduce activation energy.
- Steps explicitly mentioned in the brain dump have "suggested": false.
- Then ADD 1-3 additional steps the user likely needs but didn't mention, with "suggested": true.
Examples: "gather materials", "review before sending", "set a reminder", "test it works".
- Keep step titles short and action-oriented.
- Every task should have at least 2 steps total.
Respond ONLY with JSON, no other text.
Example:
{{
"parsed_tasks": [{{
"title": "concise task title",
"description": "any extra detail from the dump",
"deadline": "ISO 8601 or null",
"priority": "0-4 integer (0=unset, 1=low, 2=med, 3=high, 4=urgent)",
"estimated_minutes": "total for all steps or null",
"tags": ["work", "personal", "health", "errands", etc.],
"subtasks": [
{{"title": "step from the dump", "description": null, "deadline": null, "estimated_minutes": 10, "suggested": false}},
{{"title": "AI-suggested next step", "description": null, "deadline": null, "estimated_minutes": 5, "suggested": true}}
]
}}],
"unparseable_fragments": ["text that couldn't be parsed into tasks"]
}}"""
text = await _text_completion(system, f"Brain dump:\n{raw_text}", max_tokens=2048)
return _parse_json(text)
async def generate_step_plan(task_title: str, task_description: str | None, estimated_minutes: int | None) -> list:
est = f"{estimated_minutes} minutes" if estimated_minutes else "unknown"
system = f"""You are an ADHD-friendly task planner.
Break this task into concrete steps of 5-15 minutes each.
Each step should be specific enough that someone with ADHD
can start immediately without decision paralysis.
Rules:
- First step should be the EASIEST (reduce activation energy)
- Steps should be independently completable
- Include time estimates per step
- Total estimated time should roughly match the task estimate
- No step longer than 15 minutes
Respond ONLY with JSON array:
[{{
"sort_order": 1,
"title": "specific action description",
"description": "additional detail if needed",
"estimated_minutes": number
}}]"""
text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}\nEstimated total: {est}")
return _parse_json(text)
async def analyze_screenshot(
screenshot_bytes: bytes,
window_title: str,
task_context: dict,
recent_summaries: list[str] | None = None,
) -> dict:
"""Legacy server-side VLM analysis. Upgraded with friction detection prompt."""
steps_text = ""
for s in task_context.get("steps", []):
cp = f' checkpoint_note="{s["checkpoint_note"]}"' if s.get("checkpoint_note") else ""
steps_text += f' - [{s["status"]}] {s["sort_order"]}. {s["title"]} (id={s["id"]}){cp}\n'
history_text = ""
if recent_summaries:
for i, summary in enumerate(recent_summaries):
history_text += f" - [{(len(recent_summaries) - i) * 5}s ago] {summary}\n"
system = f"""You are a proactive focus assistant analyzing a user's screen.
The user's current task and step progress:
Task: {task_context.get("task_title", "")}
Goal: {task_context.get("task_goal", "")}
Steps:
{steps_text} Window title reported by OS: {window_title}
{"Recent screen history:" + chr(10) + history_text if history_text else ""}
Analyze the current screenshot. Determine:
1. TASK STATUS: Is the user working on their task? Which step? Any steps completed?
2. CHECKPOINT: What specific within-step progress have they made?
3. FRICTION DETECTION: Is the user stuck in any of these patterns?
- REPETITIVE_LOOP: Switching between same 2-3 windows (copying data manually)
- STALLED: Same screen region with minimal changes for extended time
- TEDIOUS_MANUAL: Doing automatable work (filling forms, organizing files, transcribing)
- CONTEXT_OVERHEAD: Many windows open, visibly searching across them
- TASK_RESUMPTION: User just returned to a task they were working on earlier
4. INTENT: If viewing informational content, is the user SKIMMING, ENGAGED, or UNCLEAR?
5. PROPOSED ACTION: If friction detected, suggest a specific action the AI could take.
Respond ONLY with JSON:
{{
"on_task": boolean,
"current_step_id": "step UUID or null",
"checkpoint_note_update": "within-step progress or null",
"steps_completed": ["UUIDs"],
"friction": {{
"type": "repetitive_loop | stalled | tedious_manual | context_overhead | task_resumption | none",
"confidence": 0.0-1.0,
"description": "what the user is struggling with or null",
"proposed_actions": [
{{"label": "action description", "action_type": "auto_extract | brain_dump", "details": "specifics"}}
],
"source_context": "what info to extract from or null",
"target_context": "where to put it or null"
}},
"intent": "skimming | engaged | unclear | null",
"distraction_type": "app_switch | browsing | idle | null",
"app_name": "primary visible application",
"confidence": 0.0-1.0,
"gentle_nudge": "nudge if distracted and no friction action applies, null otherwise",
"vlm_summary": "1-sentence factual description of screen"
}}"""
text = await _vision_completion(system, screenshot_bytes, "Analyze this screenshot.")
return _parse_json(text)
async def generate_resume_card(
task_title: str,
goal: str | None,
current_step_title: str | None,
checkpoint_note: str | None,
completed_count: int,
total_count: int,
next_step_title: str | None,
minutes_away: int,
attention_score: int | None,
) -> dict:
system = """Generate a brief, encouraging context-resume card for
someone with ADHD returning to their task.
Be warm, specific, and action-oriented. No shame. No generic platitudes.
Use the checkpoint_note to give hyper-specific context about where they left off.
Respond ONLY with JSON:
{
"welcome_back": "short friendly greeting (max 8 words)",
"you_were_doing": "1 sentence referencing checkpoint_note specifically",
"next_step": "concrete next action with time estimate",
"motivation": "1 sentence encouragement (ADHD-friendly, no shame)"
}"""
user_content = f"""Inputs:
- Task: {task_title}
- Overall goal: {goal or "N/A"}
- Current step: {current_step_title or "N/A"}
- Current step checkpoint_note: {checkpoint_note or "N/A"}
- Steps completed: {completed_count} of {total_count}
- Next step after current: {next_step_title or "N/A"}
- Time away: {minutes_away} minutes
- Attention score before leaving: {attention_score or "N/A"}"""
text = await _text_completion(system, user_content, max_tokens=256)
return _parse_json(text)
async def generate_app_activity_nudge(
app_name: str,
duration_seconds: int,
task_title: str,
current_step_title: str | None,
checkpoint_note: str | None,
) -> str:
minutes = duration_seconds // 60
duration_text = f"{minutes} minute{'s' if minutes != 1 else ''}" if minutes > 0 else f"{duration_seconds} seconds"
system = """Generate a single gentle, non-judgmental nudge for someone with ADHD
who drifted to a non-work app during a focus session.
Reference their specific progress to make returning easier.
No shame. Keep it under 30 words.
Respond with ONLY the nudge text, no JSON, no quotes."""
user_content = f"""Context:
- Distraction app: {app_name}
- Time spent: {duration_text}
- Current task: {task_title}
- Current step: {current_step_title or "N/A"}
- Progress so far: {checkpoint_note or "N/A"}"""
return (await _text_completion(system, user_content, max_tokens=100)).strip()
async def suggest_work_apps(task_title: str, task_description: str | None) -> dict:
system = """Given this task, suggest which Apple apps the user likely needs.
Return the most likely single app as the primary suggestion.
Respond ONLY with JSON:
{
"suggested_app_scheme": "URL scheme (e.g. mobilenotes://, x-apple-pages://, com.google.docs://)",
"suggested_app_name": "human-readable name (e.g. Notes, Pages, Google Docs)"
}"""
text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}", max_tokens=100)
return _parse_json(text)
async def prioritize_tasks(tasks_json: list, timezone: str) -> list:
from datetime import datetime
system = """You are an ADHD-friendly task prioritizer.
Consider: deadlines, estimated effort, task dependencies,
and the user's energy patterns.
Rules:
- Hard deadlines always take top priority
- Front-load quick wins (<15min) for momentum
- Group errands together
- Deprioritize tasks with no deadline and low urgency
Respond ONLY with JSON array:
[{
"task_id": "uuid",
"recommended_priority": 1-4,
"reason": "1-sentence explanation"
}]"""
user_content = f"""Input: {json.dumps(tasks_json)}
Current time: {datetime.now().isoformat()}
User's timezone: {timezone}"""
text = await _text_completion(system, user_content, max_tokens=512)
return _parse_json(text)