def multiple_scores(outputs: dict, reference_outputs: dict) -> list[dict]:
# Replace with real evaluation logic.
precision = 0.8
recall = 0.9
f1 = 0.85
return [
{"key": "precision", "score": precision},
{"key": "recall", "score": recall},
{"key": "f1", "score": f1},
]