Coverage for app / main.py: 100%

54 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-19 12:53 +0200

1""" 

2FastAPI endpoint that proxies questions to a local Ollama LLM. 

3 

4Prerequisites (macOS): 

5 brew install ollama 

6 ollama serve # starts the local server on :11434 

7 ollama pull llama3.2 # downloads a small, fast model (~2 GB) 

8""" 

9 

10from fastapi import FastAPI, HTTPException 

11from pydantic import BaseModel 

12import httpx 

13import logging 

14import time 

15 

16logger = logging.getLogger("app.main") 

17 

18app = FastAPI(title="Local LLM API") 

19 

20 

21def _preview(s: str, n: int = 60) -> str: 

22 s = " ".join(s.split()) 

23 return s if len(s) <= n else s[: n - 1] + "…" 

24 

25OLLAMA_URL = "http://localhost:11434/api/generate" 

26MODEL = "llama3.2" 

27TIMEOUT_SECONDS = 120 # generous for first-run cold starts 

28 

29# Simple keyword list for the "refuse harmful prompts" requirement. 

30# A real system would use a classifier or moderation model. 

31HARMFUL_KEYWORDS = [ 

32 "how to make a bomb", 

33 "how to hack", 

34 "how to kill", 

35 "create malware", 

36 "synthesize drugs", 

37 "build a weapon", 

38] 

39 

40 

41class QuestionRequest(BaseModel): 

42 question: str 

43 

44 

45class AnswerResponse(BaseModel): 

46 answer: str 

47 model: str 

48 elapsed_seconds: float 

49 

50 

51def _is_harmful(text: str) -> bool: 

52 lower = text.lower() 

53 return any(kw in lower for kw in HARMFUL_KEYWORDS) 

54 

55 

56@app.post("/ask", response_model=AnswerResponse) 

57async def ask(req: QuestionRequest): 

58 preview = _preview(req.question) 

59 

60 if not req.question or not req.question.strip(): 

61 logger.info("POST /ask prompt=%r verdict=empty status=422", preview) 

62 raise HTTPException(status_code=422, detail="Question must not be empty.") 

63 

64 if _is_harmful(req.question): 

65 logger.warning("POST /ask prompt=%r verdict=refused status=400", preview) 

66 raise HTTPException(status_code=400, detail="This prompt has been refused.") 

67 

68 start = time.perf_counter() 

69 try: 

70 async with httpx.AsyncClient(timeout=TIMEOUT_SECONDS) as client: 

71 resp = await client.post( 

72 OLLAMA_URL, 

73 json={"model": MODEL, "prompt": req.question, "stream": False}, 

74 ) 

75 resp.raise_for_status() 

76 except httpx.ConnectError: 

77 logger.error("POST /ask prompt=%r verdict=ollama_unreachable status=503", preview) 

78 raise HTTPException( 

79 status_code=503, 

80 detail="Cannot reach Ollama. Is `ollama serve` running?", 

81 ) 

82 except httpx.HTTPStatusError as exc: 

83 logger.error("POST /ask prompt=%r verdict=ollama_error status=502 body=%r", 

84 preview, exc.response.text[:120]) 

85 raise HTTPException(status_code=502, detail=f"Ollama error: {exc.response.text}") 

86 

87 elapsed = time.perf_counter() - start 

88 data = resp.json() 

89 answer_text = data.get("response", "").strip() 

90 

91 if not answer_text: 

92 logger.error("POST /ask prompt=%r verdict=empty_answer status=502 elapsed=%.3fs", 

93 preview, elapsed) 

94 raise HTTPException(status_code=502, detail="Ollama returned an empty response.") 

95 

96 logger.info( 

97 "POST /ask prompt=%r verdict=allowed status=200 elapsed=%.3fs answer=%r", 

98 preview, elapsed, _preview(answer_text, 80), 

99 ) 

100 return AnswerResponse(answer=answer_text, model=MODEL, elapsed_seconds=round(elapsed, 3)) 

101 

102 

103@app.get("/health") 

104async def health(): 

105 """Quick liveness check.""" 

106 return {"status": "ok"}