Coverage for eval_harness / providers / ollama.py: 88%

26 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-02 20:06 +0200

1from __future__ import annotations 

2 

3import logging 

4 

5import httpx 

6 

7from eval_harness.providers.base import Provider 

8 

9logger = logging.getLogger(__name__) 

10 

11 

12class OllamaError(RuntimeError): 

13 """Raised when the Ollama backend returns an error or unexpected payload.""" 

14 

15 

16class OllamaProvider(Provider): 

17 """Ollama `/api/generate` adapter. 

18 

19 Streaming is disabled (`stream=False`) so a single JSON response carries 

20 the full completion under the `response` key. Sampling params are passed 

21 through Ollama's `options` object — defaults here are conservative for 

22 eval reproducibility (low temperature), but callers can override. 

23 """ 

24 

25 def __init__( 

26 self, 

27 model: str = "llama3.2", 

28 base_url: str = "http://localhost:11434", 

29 timeout: float = 60.0, 

30 temperature: float = 0.2, 

31 ) -> None: 

32 self.model = model 

33 self.base_url = base_url.rstrip("/") 

34 self.timeout = timeout 

35 self.temperature = temperature 

36 

37 async def generate(self, prompt: str) -> str: 

38 url = f"{self.base_url}/api/generate" 

39 payload = { 

40 "model": self.model, 

41 "prompt": prompt, 

42 "stream": False, 

43 "options": {"temperature": self.temperature}, 

44 } 

45 logger.debug("ollama.generate", extra={"model": self.model, "url": url}) 

46 async with httpx.AsyncClient(timeout=self.timeout) as client: 

47 try: 

48 resp = await client.post(url, json=payload) 

49 resp.raise_for_status() 

50 except httpx.HTTPError as e: 

51 raise OllamaError(f"Ollama request failed: {e}") from e 

52 

53 data = resp.json() 

54 if "response" not in data: 

55 raise OllamaError(f"Ollama response missing 'response' key: {data!r}") 

56 return data["response"]