API
This commit is contained in:
351
app/services/llm.py
Normal file
351
app/services/llm.py
Normal file
@@ -0,0 +1,351 @@
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Provider setup: prefer Anthropic, fall back to Gemini ──
|
||||
|
||||
_provider: str | None = None
|
||||
|
||||
if settings.ANTHROPIC_API_KEY:
|
||||
import anthropic
|
||||
_anthropic_client = anthropic.Anthropic(api_key=settings.ANTHROPIC_API_KEY)
|
||||
_provider = "anthropic"
|
||||
_model = "claude-sonnet-4-20250514"
|
||||
logger.info("LLM provider: Anthropic (Claude)")
|
||||
|
||||
elif settings.GEMINI_API_KEY:
|
||||
from google import genai
|
||||
from google.genai import types as genai_types
|
||||
_gemini_client = genai.Client(api_key=settings.GEMINI_API_KEY)
|
||||
_provider = "gemini"
|
||||
_model = "gemini-3.1-pro-preview"
|
||||
logger.info("LLM provider: Google (Gemini)")
|
||||
|
||||
|
||||
def _parse_json(text: str) -> dict | list:
|
||||
import re
|
||||
text = text.strip()
|
||||
# Strip markdown code fences
|
||||
if text.startswith("```"):
|
||||
text = text.split("\n", 1)[1]
|
||||
text = text.rsplit("```", 1)[0]
|
||||
# Find the first { or [ and last } or ]
|
||||
start = -1
|
||||
for i, c in enumerate(text):
|
||||
if c in "{[":
|
||||
start = i
|
||||
break
|
||||
if start == -1:
|
||||
raise ValueError(f"No JSON found in LLM response: {text[:200]}")
|
||||
end = max(text.rfind("}"), text.rfind("]"))
|
||||
if end == -1:
|
||||
raise ValueError(f"No closing bracket in LLM response: {text[:200]}")
|
||||
json_str = text[start:end + 1]
|
||||
# Strip // comments (Gemini sometimes adds these)
|
||||
json_str = re.sub(r'//[^\n]*', '', json_str)
|
||||
# Strip trailing commas before } or ]
|
||||
json_str = re.sub(r',\s*([}\]])', r'\1', json_str)
|
||||
return json.loads(json_str)
|
||||
|
||||
|
||||
def _check_provider():
|
||||
if not _provider:
|
||||
raise RuntimeError("No LLM API key configured. Set ANTHROPIC_API_KEY or GEMINI_API_KEY in .env")
|
||||
|
||||
|
||||
async def _text_completion(system: str, user_content: str, max_tokens: int = 1024) -> str:
|
||||
_check_provider()
|
||||
if _provider == "anthropic":
|
||||
response = _anthropic_client.messages.create(
|
||||
model=_model,
|
||||
max_tokens=max_tokens,
|
||||
messages=[{"role": "user", "content": f"{system}\n\n{user_content}"}],
|
||||
)
|
||||
return response.content[0].text
|
||||
else:
|
||||
response = _gemini_client.models.generate_content(
|
||||
model=_model,
|
||||
config={"system_instruction": system},
|
||||
contents=user_content,
|
||||
)
|
||||
return response.text
|
||||
|
||||
|
||||
async def _vision_completion(system: str, image_bytes: bytes, user_text: str, max_tokens: int = 512) -> str:
|
||||
_check_provider()
|
||||
if _provider == "anthropic":
|
||||
image_b64 = base64.b64encode(image_bytes).decode()
|
||||
response = _anthropic_client.messages.create(
|
||||
model=_model,
|
||||
max_tokens=max_tokens,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_b64}},
|
||||
{"type": "text", "text": f"{system}\n\n{user_text}"},
|
||||
],
|
||||
}],
|
||||
)
|
||||
return response.content[0].text
|
||||
else:
|
||||
response = _gemini_client.models.generate_content(
|
||||
model=_model,
|
||||
config={"system_instruction": system},
|
||||
contents=[
|
||||
genai_types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg"),
|
||||
user_text,
|
||||
],
|
||||
)
|
||||
return response.text
|
||||
|
||||
|
||||
# ── Public API (unchanged signatures) ──
|
||||
|
||||
|
||||
async def parse_brain_dump(raw_text: str, timezone: str) -> dict:
|
||||
from datetime import datetime
|
||||
|
||||
system = f"""You are a task parser and ADHD-friendly planner.
|
||||
Extract structured tasks from this brain dump, then break each task into
|
||||
concrete, actionable steps someone with ADHD can start immediately.
|
||||
|
||||
Today's date: {datetime.now().strftime("%Y-%m-%d")}
|
||||
User's timezone: {timezone}
|
||||
|
||||
Task extraction rules:
|
||||
- Be generous with deadlines — infer from context.
|
||||
- If no deadline is obvious, set priority to 0 (unset).
|
||||
- Unrelated items stay as separate top-level tasks.
|
||||
|
||||
Step rules (applied to every task's subtasks array):
|
||||
- Each step should be 5-15 minutes, specific enough to start without decision paralysis.
|
||||
- First step should be the EASIEST to reduce activation energy.
|
||||
- Steps explicitly mentioned in the brain dump have "suggested": false.
|
||||
- Then ADD 1-3 additional steps the user likely needs but didn't mention, with "suggested": true.
|
||||
Examples: "gather materials", "review before sending", "set a reminder", "test it works".
|
||||
- Keep step titles short and action-oriented.
|
||||
- Every task should have at least 2 steps total.
|
||||
|
||||
Respond ONLY with JSON, no other text.
|
||||
Example:
|
||||
{{
|
||||
"parsed_tasks": [{{
|
||||
"title": "concise task title",
|
||||
"description": "any extra detail from the dump",
|
||||
"deadline": "ISO 8601 or null",
|
||||
"priority": "0-4 integer (0=unset, 1=low, 2=med, 3=high, 4=urgent)",
|
||||
"estimated_minutes": "total for all steps or null",
|
||||
"tags": ["work", "personal", "health", "errands", etc.],
|
||||
"subtasks": [
|
||||
{{"title": "step from the dump", "description": null, "deadline": null, "estimated_minutes": 10, "suggested": false}},
|
||||
{{"title": "AI-suggested next step", "description": null, "deadline": null, "estimated_minutes": 5, "suggested": true}}
|
||||
]
|
||||
}}],
|
||||
"unparseable_fragments": ["text that couldn't be parsed into tasks"]
|
||||
}}"""
|
||||
|
||||
text = await _text_completion(system, f"Brain dump:\n{raw_text}", max_tokens=2048)
|
||||
return _parse_json(text)
|
||||
|
||||
|
||||
|
||||
|
||||
async def generate_step_plan(task_title: str, task_description: str | None, estimated_minutes: int | None) -> list:
|
||||
est = f"{estimated_minutes} minutes" if estimated_minutes else "unknown"
|
||||
system = f"""You are an ADHD-friendly task planner.
|
||||
Break this task into concrete steps of 5-15 minutes each.
|
||||
Each step should be specific enough that someone with ADHD
|
||||
can start immediately without decision paralysis.
|
||||
|
||||
Rules:
|
||||
- First step should be the EASIEST (reduce activation energy)
|
||||
- Steps should be independently completable
|
||||
- Include time estimates per step
|
||||
- Total estimated time should roughly match the task estimate
|
||||
- No step longer than 15 minutes
|
||||
|
||||
Respond ONLY with JSON array:
|
||||
[{{
|
||||
"sort_order": 1,
|
||||
"title": "specific action description",
|
||||
"description": "additional detail if needed",
|
||||
"estimated_minutes": number
|
||||
}}]"""
|
||||
|
||||
text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}\nEstimated total: {est}")
|
||||
return _parse_json(text)
|
||||
|
||||
|
||||
async def analyze_screenshot(
|
||||
screenshot_bytes: bytes,
|
||||
window_title: str,
|
||||
task_context: dict,
|
||||
recent_summaries: list[str] | None = None,
|
||||
) -> dict:
|
||||
"""Legacy server-side VLM analysis. Upgraded with friction detection prompt."""
|
||||
steps_text = ""
|
||||
for s in task_context.get("steps", []):
|
||||
cp = f' checkpoint_note="{s["checkpoint_note"]}"' if s.get("checkpoint_note") else ""
|
||||
steps_text += f' - [{s["status"]}] {s["sort_order"]}. {s["title"]} (id={s["id"]}){cp}\n'
|
||||
|
||||
history_text = ""
|
||||
if recent_summaries:
|
||||
for i, summary in enumerate(recent_summaries):
|
||||
history_text += f" - [{(len(recent_summaries) - i) * 5}s ago] {summary}\n"
|
||||
|
||||
system = f"""You are a proactive focus assistant analyzing a user's screen.
|
||||
The user's current task and step progress:
|
||||
Task: {task_context.get("task_title", "")}
|
||||
Goal: {task_context.get("task_goal", "")}
|
||||
Steps:
|
||||
{steps_text} Window title reported by OS: {window_title}
|
||||
{"Recent screen history:" + chr(10) + history_text if history_text else ""}
|
||||
Analyze the current screenshot. Determine:
|
||||
|
||||
1. TASK STATUS: Is the user working on their task? Which step? Any steps completed?
|
||||
2. CHECKPOINT: What specific within-step progress have they made?
|
||||
3. FRICTION DETECTION: Is the user stuck in any of these patterns?
|
||||
- REPETITIVE_LOOP: Switching between same 2-3 windows (copying data manually)
|
||||
- STALLED: Same screen region with minimal changes for extended time
|
||||
- TEDIOUS_MANUAL: Doing automatable work (filling forms, organizing files, transcribing)
|
||||
- CONTEXT_OVERHEAD: Many windows open, visibly searching across them
|
||||
- TASK_RESUMPTION: User just returned to a task they were working on earlier
|
||||
4. INTENT: If viewing informational content, is the user SKIMMING, ENGAGED, or UNCLEAR?
|
||||
5. PROPOSED ACTION: If friction detected, suggest a specific action the AI could take.
|
||||
|
||||
Respond ONLY with JSON:
|
||||
{{
|
||||
"on_task": boolean,
|
||||
"current_step_id": "step UUID or null",
|
||||
"checkpoint_note_update": "within-step progress or null",
|
||||
"steps_completed": ["UUIDs"],
|
||||
"friction": {{
|
||||
"type": "repetitive_loop | stalled | tedious_manual | context_overhead | task_resumption | none",
|
||||
"confidence": 0.0-1.0,
|
||||
"description": "what the user is struggling with or null",
|
||||
"proposed_actions": [
|
||||
{{"label": "action description", "action_type": "auto_extract | brain_dump", "details": "specifics"}}
|
||||
],
|
||||
"source_context": "what info to extract from or null",
|
||||
"target_context": "where to put it or null"
|
||||
}},
|
||||
"intent": "skimming | engaged | unclear | null",
|
||||
"distraction_type": "app_switch | browsing | idle | null",
|
||||
"app_name": "primary visible application",
|
||||
"confidence": 0.0-1.0,
|
||||
"gentle_nudge": "nudge if distracted and no friction action applies, null otherwise",
|
||||
"vlm_summary": "1-sentence factual description of screen"
|
||||
}}"""
|
||||
|
||||
text = await _vision_completion(system, screenshot_bytes, "Analyze this screenshot.")
|
||||
return _parse_json(text)
|
||||
|
||||
|
||||
async def generate_resume_card(
|
||||
task_title: str,
|
||||
goal: str | None,
|
||||
current_step_title: str | None,
|
||||
checkpoint_note: str | None,
|
||||
completed_count: int,
|
||||
total_count: int,
|
||||
next_step_title: str | None,
|
||||
minutes_away: int,
|
||||
attention_score: int | None,
|
||||
) -> dict:
|
||||
system = """Generate a brief, encouraging context-resume card for
|
||||
someone with ADHD returning to their task.
|
||||
Be warm, specific, and action-oriented. No shame. No generic platitudes.
|
||||
Use the checkpoint_note to give hyper-specific context about where they left off.
|
||||
|
||||
Respond ONLY with JSON:
|
||||
{
|
||||
"welcome_back": "short friendly greeting (max 8 words)",
|
||||
"you_were_doing": "1 sentence referencing checkpoint_note specifically",
|
||||
"next_step": "concrete next action with time estimate",
|
||||
"motivation": "1 sentence encouragement (ADHD-friendly, no shame)"
|
||||
}"""
|
||||
|
||||
user_content = f"""Inputs:
|
||||
- Task: {task_title}
|
||||
- Overall goal: {goal or "N/A"}
|
||||
- Current step: {current_step_title or "N/A"}
|
||||
- Current step checkpoint_note: {checkpoint_note or "N/A"}
|
||||
- Steps completed: {completed_count} of {total_count}
|
||||
- Next step after current: {next_step_title or "N/A"}
|
||||
- Time away: {minutes_away} minutes
|
||||
- Attention score before leaving: {attention_score or "N/A"}"""
|
||||
|
||||
text = await _text_completion(system, user_content, max_tokens=256)
|
||||
return _parse_json(text)
|
||||
|
||||
|
||||
async def generate_app_activity_nudge(
|
||||
app_name: str,
|
||||
duration_seconds: int,
|
||||
task_title: str,
|
||||
current_step_title: str | None,
|
||||
checkpoint_note: str | None,
|
||||
) -> str:
|
||||
minutes = duration_seconds // 60
|
||||
duration_text = f"{minutes} minute{'s' if minutes != 1 else ''}" if minutes > 0 else f"{duration_seconds} seconds"
|
||||
|
||||
system = """Generate a single gentle, non-judgmental nudge for someone with ADHD
|
||||
who drifted to a non-work app during a focus session.
|
||||
Reference their specific progress to make returning easier.
|
||||
No shame. Keep it under 30 words.
|
||||
Respond with ONLY the nudge text, no JSON, no quotes."""
|
||||
|
||||
user_content = f"""Context:
|
||||
- Distraction app: {app_name}
|
||||
- Time spent: {duration_text}
|
||||
- Current task: {task_title}
|
||||
- Current step: {current_step_title or "N/A"}
|
||||
- Progress so far: {checkpoint_note or "N/A"}"""
|
||||
|
||||
return (await _text_completion(system, user_content, max_tokens=100)).strip()
|
||||
|
||||
|
||||
async def suggest_work_apps(task_title: str, task_description: str | None) -> dict:
|
||||
system = """Given this task, suggest which Apple apps the user likely needs.
|
||||
Return the most likely single app as the primary suggestion.
|
||||
|
||||
Respond ONLY with JSON:
|
||||
{
|
||||
"suggested_app_scheme": "URL scheme (e.g. mobilenotes://, x-apple-pages://, com.google.docs://)",
|
||||
"suggested_app_name": "human-readable name (e.g. Notes, Pages, Google Docs)"
|
||||
}"""
|
||||
|
||||
text = await _text_completion(system, f"Task: {task_title}\nDescription: {task_description or 'N/A'}", max_tokens=100)
|
||||
return _parse_json(text)
|
||||
|
||||
|
||||
async def prioritize_tasks(tasks_json: list, timezone: str) -> list:
|
||||
from datetime import datetime
|
||||
|
||||
system = """You are an ADHD-friendly task prioritizer.
|
||||
Consider: deadlines, estimated effort, task dependencies,
|
||||
and the user's energy patterns.
|
||||
|
||||
Rules:
|
||||
- Hard deadlines always take top priority
|
||||
- Front-load quick wins (<15min) for momentum
|
||||
- Group errands together
|
||||
- Deprioritize tasks with no deadline and low urgency
|
||||
|
||||
Respond ONLY with JSON array:
|
||||
[{
|
||||
"task_id": "uuid",
|
||||
"recommended_priority": 1-4,
|
||||
"reason": "1-sentence explanation"
|
||||
}]"""
|
||||
|
||||
user_content = f"""Input: {json.dumps(tasks_json)}
|
||||
Current time: {datetime.now().isoformat()}
|
||||
User's timezone: {timezone}"""
|
||||
|
||||
text = await _text_completion(system, user_content, max_tokens=512)
|
||||
return _parse_json(text)
|
||||
Reference in New Issue
Block a user