352 lines
13 KiB
Python
352 lines
13 KiB
Python
|
|
import base64
|
||
|
|
import json
|
||
|
|
import logging
|
||
|
|
|
||
|
|
from app.config import settings
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
# ── Provider setup: prefer Anthropic, fall back to Gemini ──
|
||
|
|
|
||
|
|
_provider: str | None = None
|
||
|
|
|
||
|
|
if settings.ANTHROPIC_API_KEY:
|
||
|
|
import anthropic
|
||
|
|
_anthropic_client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
|
||
|
|
_provider = "anthropic"
|
||
|
|
_model = "claude-sonnet-4-20250514"
|
||
|
|
logger.info("LLM provider: Anthropic (Claude)")
|
||
|
|
|
||
|
|
elif settings.GEMINI_API_KEY:
|
||
|
|
from google import genai
|
||
|
|
from google.genai import types as genai_types
|
||
|
|
_gemini_client = genai.Client(api_key=settings.GEMINI_API_KEY)
|
||
|
|
_provider = "gemini"
|
||
|
|
_model = "gemini-3.1-pro-preview"
|
||
|
|
logger.info("LLM provider: Google (Gemini)")
|
||
|
|
|
||
|
|
|
||
|
|
def _parse_json(text: str) -> dict | list:
|
||
|
|
import re
|
||
|
|
text = text.strip()
|
||
|
|
# Strip markdown code fences
|
||
|
|
if text.startswith("```"):
|
||
|
|
text = text.split("\n", 1)[1]
|
||
|
|
text = text.rsplit("```", 1)[0]
|
||
|
|
# Find the first { or [ and last } or ]
|
||
|
|
start = -1
|
||
|
|
for i, c in enumerate(text):
|
||
|
|
if c in "{[":
|
||
|
|
start = i
|
||
|
|
break
|
||
|
|
if start == -1:
|
||
|
|
raise ValueError(f"No JSON found in LLM response: {text[:200]}")
|
||
|
|
end = max(text.rfind("}"), text.rfind("]"))
|
||
|
|
if end == -1:
|
||
|
|
raise ValueError(f"No closing bracket in LLM response: {text[:200]}")
|
||
|
|
json_str = text[start:end + 1]
|
||
|
|
# Strip // comments (Gemini sometimes adds these)
|
||
|
|
json_str = re.sub(r'//[^\n]*', '', json_str)
|
||
|
|
# Strip trailing commas before } or ]
|
||
|
|
json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
|
||
|
|
return json.loads(json_str)
|
||
|
|
|
||
|
|
|
||
|
|
def _check_provider():
|
||
|
|
if not _provider:
|
||
|
|
raise RuntimeError("No LLM API key configured. Set ANTHROPIC_API_KEY or GEMINI_API_KEY in .env")
|
||
|
|
|
||
|
|
|
||
|
|
async def _text_completion(system: str, user_content: str, max_tokens: int = 1024) -> str:
|
||
|
|
_check_provider()
|
||
|
|
if _provider == "anthropic":
|
||
|
|
response = _anthropic_client.messages.create(
|
||
|
|
model=_model,
|
||
|
|
max_tokens=max_tokens,
|
||
|
|
messages=[{"role": "user", "content": f"{system}\n\n{user_content}"}],
|
||
|
|
)
|
||
|
|
return response.content[0].text
|
||
|
|
else:
|
||
|
|
response = _gemini_client.models.generate_content(
|
||
|
|
model=_model,
|
||
|
|
config={"system_instruction": system},
|
||
|
|
contents=user_content,
|
||
|
|
)
|
||
|
|
return response.text
|
||
|
|
|
||
|
|
|
||
|
|
async def _vision_completion(system: str, image_bytes: bytes, user_text: str, max_tokens: int = 512) -> str:
|
||
|
|
_check_provider()
|
||
|
|
if _provider == "anthropic":
|
||
|
|
image_b64 = base64.b64encode(image_bytes).decode()
|
||
|
|
response = _anthropic_client.messages.create(
|
||
|
|
model=_model,
|
||
|
|
max_tokens=max_tokens,
|
||
|
|
messages=[{
|
||
|
|
"role": "user",
|
||
|
|
"content": [
|
||
|
|
{"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_b64}},
|
||
|
|
{"type": "text", "text": f"{system}\n\n{user_text}"},
|
||
|
|
],
|
||
|
|
}],
|
||
|
|
)
|
||
|
|
return response.content[0].text
|
||
|
|
else:
|
||
|
|
response = _gemini_client.models.generate_content(
|
||
|
|
model=_model,
|
||
|
|
config={"system_instruction": system},
|
||
|
|
contents=[
|
||
|
|
genai_types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
|
||
|
|
user_text,
|
||
|
|
],
|
||
|
|
)
|
||
|
|
return response.text
|
||
|
|
|
||
|
|
|
||
|
|
# ── Public API (unchanged signatures) ──
|
||
|
|
|
||
|
|
|
||
|
|
async def parse_brain_dump(raw_text: str, timezone: str) -> dict:
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
system = f"""You are a task parser and ADHD-friendly planner.
|
||
|
|
Extract structured tasks from this brain dump, then break each task into
|
||
|
|
concrete, actionable steps someone with ADHD can start immediately.
|
||
|
|
|
||
|
|
Today's date: {datetime.now().strftime("%Y-%m-%d")}
|
||
|
|
User's timezone: {timezone}
|
||
|
|
|
||
|
|
Task extraction rules:
|
||
|
|
- Be generous with deadlines — infer from context.
|
||
|
|
- If no deadline is obvious, set priority to 0 (unset).
|
||
|
|
- Unrelated items stay as separate top-level tasks.
|
||
|
|
|
||
|
|
Step rules (applied to every task's subtasks array):
|
||
|
|
- Each step should be 5-15 minutes, specific enough to start without decision paralysis.
|
||
|
|
- First step should be the EASIEST to reduce activation energy.
|
||
|
|
- Steps explicitly mentioned in the brain dump have "suggested": false.
|
||
|
|
- Then ADD 1-3 additional steps the user likely needs but didn't mention, with "suggested": true.
|
||
|
|
Examples: "gather materials", "review before sending", "set a reminder", "test it works".
|
||
|
|
- Keep step titles short and action-oriented.
|
||
|
|
- Every task should have at least 2 steps total.
|
||
|
|
|
||
|
|
Respond ONLY with JSON, no other text.
|
||
|
|
Example:
|
||
|
|
{{
|
||
|
|
"parsed_tasks": [{{
|
||
|
|
"title": "concise task title",
|
||
|
|
"description": "any extra detail from the dump",
|
||
|
|
"deadline": "ISO 8601 or null",
|
||
|
|
"priority": "0-4 integer (0=unset, 1=low, 2=med, 3=high, 4=urgent)",
|
||
|
|
"estimated_minutes": "total for all steps or null",
|
||
|
|
"tags": ["work", "personal", "health", "errands", etc.],
|
||
|
|
"subtasks": [
|
||
|
|
{{"title": "step from the dump", "description": null, "deadline": null, "estimated_minutes": 10, "suggested": false}},
|
||
|
|
{{"title": "AI-suggested next step", "description": null, "deadline": null, "estimated_minutes": 5, "suggested": true}}
|
||
|
|
]
|
||
|
|
}}],
|
||
|
|
"unparseable_fragments": ["text that couldn't be parsed into tasks"]
|
||
|
|
}}"""
|
||
|
|
|
||
|
|
text = await _text_completion(system, f"Brain dump:\n{raw_text}", max_tokens=2048)
|
||
|
|
return _parse_json(text)
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
async def generate_step_plan(task_title: str, task_description: str | None, estimated_minutes: int | None) -> list:
|
||
|
|
est = f"{estimated_minutes} minutes" if estimated_minutes else "unknown"
|
||
|
|
system = f"""You are an ADHD-friendly task planner.
|
||
|
|
Break this task into concrete steps of 5-15 minutes each.
|
||
|
|
Each step should be specific enough that someone with ADHD
|
||
|
|
can start immediately without decision paralysis.
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- First step should be the EASIEST (reduce activation energy)
|
||
|
|
- Steps should be independently completable
|
||
|
|
- Include time estimates per step
|
||
|
|
- Total estimated time should roughly match the task estimate
|
||
|
|
- No step longer than 15 minutes
|
||
|
|
|
||
|
|
Respond ONLY with JSON array:
|
||
|
|
[{{
|
||
|
|
"sort_order": 1,
|
||
|
|
"title": "specific action description",
|
||
|
|
"description": "additional detail if needed",
|
||
|
|
"estimated_minutes": number
|
||
|
|
}}]"""
|
||
|
|
|
||
|
|
text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}\nEstimated total: {est}")
|
||
|
|
return _parse_json(text)
|
||
|
|
|
||
|
|
|
||
|
|
async def analyze_screenshot(
|
||
|
|
screenshot_bytes: bytes,
|
||
|
|
window_title: str,
|
||
|
|
task_context: dict,
|
||
|
|
recent_summaries: list[str] | None = None,
|
||
|
|
) -> dict:
|
||
|
|
"""Legacy server-side VLM analysis. Upgraded with friction detection prompt."""
|
||
|
|
steps_text = ""
|
||
|
|
for s in task_context.get("steps", []):
|
||
|
|
cp = f' checkpoint_note="{s["checkpoint_note"]}"' if s.get("checkpoint_note") else ""
|
||
|
|
steps_text += f' - [{s["status"]}] {s["sort_order"]}. {s["title"]} (id={s["id"]}){cp}\n'
|
||
|
|
|
||
|
|
history_text = ""
|
||
|
|
if recent_summaries:
|
||
|
|
for i, summary in enumerate(recent_summaries):
|
||
|
|
history_text += f" - [{(len(recent_summaries) - i) * 5}s ago] {summary}\n"
|
||
|
|
|
||
|
|
system = f"""You are a proactive focus assistant analyzing a user's screen.
|
||
|
|
The user's current task and step progress:
|
||
|
|
Task: {task_context.get("task_title", "")}
|
||
|
|
Goal: {task_context.get("task_goal", "")}
|
||
|
|
Steps:
|
||
|
|
{steps_text} Window title reported by OS: {window_title}
|
||
|
|
{"Recent screen history:" + chr(10) + history_text if history_text else ""}
|
||
|
|
Analyze the current screenshot. Determine:
|
||
|
|
|
||
|
|
1. TASK STATUS: Is the user working on their task? Which step? Any steps completed?
|
||
|
|
2. CHECKPOINT: What specific within-step progress have they made?
|
||
|
|
3. FRICTION DETECTION: Is the user stuck in any of these patterns?
|
||
|
|
- REPETITIVE_LOOP: Switching between same 2-3 windows (copying data manually)
|
||
|
|
- STALLED: Same screen region with minimal changes for extended time
|
||
|
|
- TEDIOUS_MANUAL: Doing automatable work (filling forms, organizing files, transcribing)
|
||
|
|
- CONTEXT_OVERHEAD: Many windows open, visibly searching across them
|
||
|
|
- TASK_RESUMPTION: User just returned to a task they were working on earlier
|
||
|
|
4. INTENT: If viewing informational content, is the user SKIMMING, ENGAGED, or UNCLEAR?
|
||
|
|
5. PROPOSED ACTION: If friction detected, suggest a specific action the AI could take.
|
||
|
|
|
||
|
|
Respond ONLY with JSON:
|
||
|
|
{{
|
||
|
|
"on_task": boolean,
|
||
|
|
"current_step_id": "step UUID or null",
|
||
|
|
"checkpoint_note_update": "within-step progress or null",
|
||
|
|
"steps_completed": ["UUIDs"],
|
||
|
|
"friction": {{
|
||
|
|
"type": "repetitive_loop | stalled | tedious_manual | context_overhead | task_resumption | none",
|
||
|
|
"confidence": 0.0-1.0,
|
||
|
|
"description": "what the user is struggling with or null",
|
||
|
|
"proposed_actions": [
|
||
|
|
{{"label": "action description", "action_type": "auto_extract | brain_dump", "details": "specifics"}}
|
||
|
|
],
|
||
|
|
"source_context": "what info to extract from or null",
|
||
|
|
"target_context": "where to put it or null"
|
||
|
|
}},
|
||
|
|
"intent": "skimming | engaged | unclear | null",
|
||
|
|
"distraction_type": "app_switch | browsing | idle | null",
|
||
|
|
"app_name": "primary visible application",
|
||
|
|
"confidence": 0.0-1.0,
|
||
|
|
"gentle_nudge": "nudge if distracted and no friction action applies, null otherwise",
|
||
|
|
"vlm_summary": "1-sentence factual description of screen"
|
||
|
|
}}"""
|
||
|
|
|
||
|
|
text = await _vision_completion(system, screenshot_bytes, "Analyze this screenshot.")
|
||
|
|
return _parse_json(text)
|
||
|
|
|
||
|
|
|
||
|
|
async def generate_resume_card(
|
||
|
|
task_title: str,
|
||
|
|
goal: str | None,
|
||
|
|
current_step_title: str | None,
|
||
|
|
checkpoint_note: str | None,
|
||
|
|
completed_count: int,
|
||
|
|
total_count: int,
|
||
|
|
next_step_title: str | None,
|
||
|
|
minutes_away: int,
|
||
|
|
attention_score: int | None,
|
||
|
|
) -> dict:
|
||
|
|
system = """Generate a brief, encouraging context-resume card for
|
||
|
|
someone with ADHD returning to their task.
|
||
|
|
Be warm, specific, and action-oriented. No shame. No generic platitudes.
|
||
|
|
Use the checkpoint_note to give hyper-specific context about where they left off.
|
||
|
|
|
||
|
|
Respond ONLY with JSON:
|
||
|
|
{
|
||
|
|
"welcome_back": "short friendly greeting (max 8 words)",
|
||
|
|
"you_were_doing": "1 sentence referencing checkpoint_note specifically",
|
||
|
|
"next_step": "concrete next action with time estimate",
|
||
|
|
"motivation": "1 sentence encouragement (ADHD-friendly, no shame)"
|
||
|
|
}"""
|
||
|
|
|
||
|
|
user_content = f"""Inputs:
|
||
|
|
- Task: {task_title}
|
||
|
|
- Overall goal: {goal or "N/A"}
|
||
|
|
- Current step: {current_step_title or "N/A"}
|
||
|
|
- Current step checkpoint_note: {checkpoint_note or "N/A"}
|
||
|
|
- Steps completed: {completed_count} of {total_count}
|
||
|
|
- Next step after current: {next_step_title or "N/A"}
|
||
|
|
- Time away: {minutes_away} minutes
|
||
|
|
- Attention score before leaving: {attention_score or "N/A"}"""
|
||
|
|
|
||
|
|
text = await _text_completion(system, user_content, max_tokens=256)
|
||
|
|
return _parse_json(text)
|
||
|
|
|
||
|
|
|
||
|
|
async def generate_app_activity_nudge(
|
||
|
|
app_name: str,
|
||
|
|
duration_seconds: int,
|
||
|
|
task_title: str,
|
||
|
|
current_step_title: str | None,
|
||
|
|
checkpoint_note: str | None,
|
||
|
|
) -> str:
|
||
|
|
minutes = duration_seconds // 60
|
||
|
|
duration_text = f"{minutes} minute{'s' if minutes != 1 else ''}" if minutes > 0 else f"{duration_seconds} seconds"
|
||
|
|
|
||
|
|
system = """Generate a single gentle, non-judgmental nudge for someone with ADHD
|
||
|
|
who drifted to a non-work app during a focus session.
|
||
|
|
Reference their specific progress to make returning easier.
|
||
|
|
No shame. Keep it under 30 words.
|
||
|
|
Respond with ONLY the nudge text, no JSON, no quotes."""
|
||
|
|
|
||
|
|
user_content = f"""Context:
|
||
|
|
- Distraction app: {app_name}
|
||
|
|
- Time spent: {duration_text}
|
||
|
|
- Current task: {task_title}
|
||
|
|
- Current step: {current_step_title or "N/A"}
|
||
|
|
- Progress so far: {checkpoint_note or "N/A"}"""
|
||
|
|
|
||
|
|
return (await _text_completion(system, user_content, max_tokens=100)).strip()
|
||
|
|
|
||
|
|
|
||
|
|
async def suggest_work_apps(task_title: str, task_description: str | None) -> dict:
|
||
|
|
system = """Given this task, suggest which Apple apps the user likely needs.
|
||
|
|
Return the most likely single app as the primary suggestion.
|
||
|
|
|
||
|
|
Respond ONLY with JSON:
|
||
|
|
{
|
||
|
|
"suggested_app_scheme": "URL scheme (e.g. mobilenotes://, x-apple-pages://, com.google.docs://)",
|
||
|
|
"suggested_app_name": "human-readable name (e.g. Notes, Pages, Google Docs)"
|
||
|
|
}"""
|
||
|
|
|
||
|
|
text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}", max_tokens=100)
|
||
|
|
return _parse_json(text)
|
||
|
|
|
||
|
|
|
||
|
|
async def prioritize_tasks(tasks_json: list, timezone: str) -> list:
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
system = """You are an ADHD-friendly task prioritizer.
|
||
|
|
Consider: deadlines, estimated effort, task dependencies,
|
||
|
|
and the user's energy patterns.
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- Hard deadlines always take top priority
|
||
|
|
- Front-load quick wins (<15min) for momentum
|
||
|
|
- Group errands together
|
||
|
|
- Deprioritize tasks with no deadline and low urgency
|
||
|
|
|
||
|
|
Respond ONLY with JSON array:
|
||
|
|
[{
|
||
|
|
"task_id": "uuid",
|
||
|
|
"recommended_priority": 1-4,
|
||
|
|
"reason": "1-sentence explanation"
|
||
|
|
}]"""
|
||
|
|
|
||
|
|
user_content = f"""Input: {json.dumps(tasks_json)}
|
||
|
|
Current time: {datetime.now().isoformat()}
|
||
|
|
User's timezone: {timezone}"""
|
||
|
|
|
||
|
|
text = await _text_completion(system, user_content, max_tokens=512)
|
||
|
|
return _parse_json(text)
|