Coverage for app/main.py: 100%

1"""

2FastAPI endpoint that proxies questions to a local Ollama LLM.

4Prerequisites (macOS):

5 brew install ollama

6 ollama serve # starts the local server on :11434

7 ollama pull llama3.2 # downloads a small, fast model (~2 GB)

8"""

10from fastapi import FastAPI, HTTPException

11from pydantic import BaseModel

12import httpx

13import logging

14import time

16logger = logging.getLogger("app.main")

18app = FastAPI(title="Local LLM API")

21def _preview(s: str, n: int = 60) -> str:

22 s = " ".join(s.split())

23 return s if len(s) <= n else s[: n - 1] + "…"

25OLLAMA_URL = "http://localhost:11434/api/generate"

26MODEL = "llama3.2"

27TIMEOUT_SECONDS = 120 # generous for first-run cold starts

29# Simple keyword list for the "refuse harmful prompts" requirement.

30# A real system would use a classifier or moderation model.

31HARMFUL_KEYWORDS = [

32 "how to make a bomb",

33 "how to hack",

34 "how to kill",

35 "create malware",

36 "synthesize drugs",

37 "build a weapon",

38]

41class QuestionRequest(BaseModel):

42 question: str

45class AnswerResponse(BaseModel):

46 answer: str

47 model: str

48 elapsed_seconds: float

51def _is_harmful(text: str) -> bool:

52 lower = text.lower()

53 return any(kw in lower for kw in HARMFUL_KEYWORDS)

56@app.post("/ask", response_model=AnswerResponse)

57async def ask(req: QuestionRequest):

58 preview = _preview(req.question)

60 if not req.question or not req.question.strip():

61 logger.info("POST /ask prompt=%r verdict=empty status=422", preview)

62 raise HTTPException(status_code=422, detail="Question must not be empty.")

64 if _is_harmful(req.question):

65 logger.warning("POST /ask prompt=%r verdict=refused status=400", preview)

66 raise HTTPException(status_code=400, detail="This prompt has been refused.")

68 start = time.perf_counter()

69 try:

70 async with httpx.AsyncClient(timeout=TIMEOUT_SECONDS) as client:

71 resp = await client.post(

72 OLLAMA_URL,

73 json={"model": MODEL, "prompt": req.question, "stream": False},

74 )

75 resp.raise_for_status()

76 except httpx.ConnectError:

77 logger.error("POST /ask prompt=%r verdict=ollama_unreachable status=503", preview)

78 raise HTTPException(

79 status_code=503,

80 detail="Cannot reach Ollama. Is `ollama serve` running?",

81 )

82 except httpx.HTTPStatusError as exc:

83 logger.error("POST /ask prompt=%r verdict=ollama_error status=502 body=%r",

84 preview, exc.response.text[:120])

85 raise HTTPException(status_code=502, detail=f"Ollama error: {exc.response.text}")

87 elapsed = time.perf_counter() - start

88 data = resp.json()

89 answer_text = data.get("response", "").strip()

91 if not answer_text:

92 logger.error("POST /ask prompt=%r verdict=empty_answer status=502 elapsed=%.3fs",

93 preview, elapsed)

94 raise HTTPException(status_code=502, detail="Ollama returned an empty response.")

96 logger.info(

97 "POST /ask prompt=%r verdict=allowed status=200 elapsed=%.3fs answer=%r",

98 preview, elapsed, _preview(answer_text, 80),

99 )

100 return AnswerResponse(answer=answer_text, model=MODEL, elapsed_seconds=round(elapsed, 3))

101

102

103@app.get("/health")

104async def health():

105 """Quick liveness check."""

106 return {"status": "ok"}

Coverage for app / main.py: 100%

54 statements