Coverage for eval_harness / scorers / exact_match.py: 100%

7 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-02 20:06 +0200

1from __future__ import annotations 

2 

3from eval_harness.scorers.base import Scorer, ScoreResult 

4 

5 

6class ExactMatchScorer(Scorer): 

7 """Strict character-for-character match. 

8 

9 No normalization (no lowercasing, no whitespace collapsing, no substring 

10 fallback). On open-ended LLM output this scorer fails almost always — 

11 that's the point. It is included as the honest baseline that motivates 

12 semantic similarity and LLM-as-judge. 

13 """ 

14 

15 name = "exact_match" 

16 

17 async def score(self, question: str, output: str, expected: str) -> ScoreResult: 

18 passed = output == expected 

19 return ScoreResult( 

20 passed=passed, 

21 score=1.0 if passed else 0.0, 

22 reason="exact match" if passed else "outputs differ", 

23 )