Coverage for eval_harness / providers / ollama.py: 88%
26 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-02 20:06 +0200
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-02 20:06 +0200
1from __future__ import annotations
3import logging
5import httpx
7from eval_harness.providers.base import Provider
9logger = logging.getLogger(__name__)
12class OllamaError(RuntimeError):
13 """Raised when the Ollama backend returns an error or unexpected payload."""
16class OllamaProvider(Provider):
17 """Ollama `/api/generate` adapter.
19 Streaming is disabled (`stream=False`) so a single JSON response carries
20 the full completion under the `response` key. Sampling params are passed
21 through Ollama's `options` object — defaults here are conservative for
22 eval reproducibility (low temperature), but callers can override.
23 """
25 def __init__(
26 self,
27 model: str = "llama3.2",
28 base_url: str = "http://localhost:11434",
29 timeout: float = 60.0,
30 temperature: float = 0.2,
31 ) -> None:
32 self.model = model
33 self.base_url = base_url.rstrip("/")
34 self.timeout = timeout
35 self.temperature = temperature
37 async def generate(self, prompt: str) -> str:
38 url = f"{self.base_url}/api/generate"
39 payload = {
40 "model": self.model,
41 "prompt": prompt,
42 "stream": False,
43 "options": {"temperature": self.temperature},
44 }
45 logger.debug("ollama.generate", extra={"model": self.model, "url": url})
46 async with httpx.AsyncClient(timeout=self.timeout) as client:
47 try:
48 resp = await client.post(url, json=payload)
49 resp.raise_for_status()
50 except httpx.HTTPError as e:
51 raise OllamaError(f"Ollama request failed: {e}") from e
53 data = resp.json()
54 if "response" not in data:
55 raise OllamaError(f"Ollama response missing 'response' key: {data!r}")
56 return data["response"]