Coverage for app / main.py: 100%
54 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-19 12:53 +0200
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-19 12:53 +0200
1"""
2FastAPI endpoint that proxies questions to a local Ollama LLM.
4Prerequisites (macOS):
5 brew install ollama
6 ollama serve # starts the local server on :11434
7 ollama pull llama3.2 # downloads a small, fast model (~2 GB)
8"""
10from fastapi import FastAPI, HTTPException
11from pydantic import BaseModel
12import httpx
13import logging
14import time
16logger = logging.getLogger("app.main")
18app = FastAPI(title="Local LLM API")
21def _preview(s: str, n: int = 60) -> str:
22 s = " ".join(s.split())
23 return s if len(s) <= n else s[: n - 1] + "…"
25OLLAMA_URL = "http://localhost:11434/api/generate"
26MODEL = "llama3.2"
27TIMEOUT_SECONDS = 120 # generous for first-run cold starts
29# Simple keyword list for the "refuse harmful prompts" requirement.
30# A real system would use a classifier or moderation model.
31HARMFUL_KEYWORDS = [
32 "how to make a bomb",
33 "how to hack",
34 "how to kill",
35 "create malware",
36 "synthesize drugs",
37 "build a weapon",
38]
41class QuestionRequest(BaseModel):
42 question: str
45class AnswerResponse(BaseModel):
46 answer: str
47 model: str
48 elapsed_seconds: float
51def _is_harmful(text: str) -> bool:
52 lower = text.lower()
53 return any(kw in lower for kw in HARMFUL_KEYWORDS)
56@app.post("/ask", response_model=AnswerResponse)
57async def ask(req: QuestionRequest):
58 preview = _preview(req.question)
60 if not req.question or not req.question.strip():
61 logger.info("POST /ask prompt=%r verdict=empty status=422", preview)
62 raise HTTPException(status_code=422, detail="Question must not be empty.")
64 if _is_harmful(req.question):
65 logger.warning("POST /ask prompt=%r verdict=refused status=400", preview)
66 raise HTTPException(status_code=400, detail="This prompt has been refused.")
68 start = time.perf_counter()
69 try:
70 async with httpx.AsyncClient(timeout=TIMEOUT_SECONDS) as client:
71 resp = await client.post(
72 OLLAMA_URL,
73 json={"model": MODEL, "prompt": req.question, "stream": False},
74 )
75 resp.raise_for_status()
76 except httpx.ConnectError:
77 logger.error("POST /ask prompt=%r verdict=ollama_unreachable status=503", preview)
78 raise HTTPException(
79 status_code=503,
80 detail="Cannot reach Ollama. Is `ollama serve` running?",
81 )
82 except httpx.HTTPStatusError as exc:
83 logger.error("POST /ask prompt=%r verdict=ollama_error status=502 body=%r",
84 preview, exc.response.text[:120])
85 raise HTTPException(status_code=502, detail=f"Ollama error: {exc.response.text}")
87 elapsed = time.perf_counter() - start
88 data = resp.json()
89 answer_text = data.get("response", "").strip()
91 if not answer_text:
92 logger.error("POST /ask prompt=%r verdict=empty_answer status=502 elapsed=%.3fs",
93 preview, elapsed)
94 raise HTTPException(status_code=502, detail="Ollama returned an empty response.")
96 logger.info(
97 "POST /ask prompt=%r verdict=allowed status=200 elapsed=%.3fs answer=%r",
98 preview, elapsed, _preview(answer_text, 80),
99 )
100 return AnswerResponse(answer=answer_text, model=MODEL, elapsed_seconds=round(elapsed, 3))
103@app.get("/health")
104async def health():
105 """Quick liveness check."""
106 return {"status": "ok"}