Coverage for eval_harness / scorers / exact_match.py: 100%
7 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-02 20:06 +0200
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-02 20:06 +0200
1from __future__ import annotations
3from eval_harness.scorers.base import Scorer, ScoreResult
6class ExactMatchScorer(Scorer):
7 """Strict character-for-character match.
9 No normalization (no lowercasing, no whitespace collapsing, no substring
10 fallback). On open-ended LLM output this scorer fails almost always —
11 that's the point. It is included as the honest baseline that motivates
12 semantic similarity and LLM-as-judge.
13 """
15 name = "exact_match"
17 async def score(self, question: str, output: str, expected: str) -> ScoreResult:
18 passed = output == expected
19 return ScoreResult(
20 passed=passed,
21 score=1.0 if passed else 0.0,
22 reason="exact match" if passed else "outputs differ",
23 )