Binary pass/fail evaluation without numeric scoring
This example demonstrates binary PASS/FAIL evaluation mode without numeric scoring.
1
Add the following code to your Python file
agent_as_judge_binary.py
Copy
Ask AI
from agno.agent import Agentfrom agno.db.sqlite import SqliteDbfrom agno.eval.agent_as_judge import AgentAsJudgeEvalfrom agno.models.openai import OpenAIResponses# Setup database to persist eval resultsdb = SqliteDb(db_file="tmp/agent_as_judge_binary.db")agent = Agent( model=OpenAIResponses(id="gpt-5.2"), instructions="You are a customer service agent. Respond professionally.", db=db,)response = agent.run("I need help with my account")evaluation = AgentAsJudgeEval( name="Professional Tone Check", criteria="Response must maintain professional tone without informal language or slang", db=db,)result = evaluation.run( input="I need help with my account", output=str(response.content), print_results=True, print_summary=True,)print(f"Result: {'PASSED' if result.results[0].passed else 'FAILED'}")