From ecdc7fdd335382fd46fc1d45979965c0f74be45b Mon Sep 17 00:00:00 2001 From: cthurston-clgx Date: Tue, 5 May 2026 12:00:51 -0500 Subject: [PATCH] fix(evaluation): populate developer_instructions when invocation_events is empty (#5593) The rubric_based_final_response_quality_v1 evaluator fails to pass developer_instructions to the judge when the agent makes zero tool calls (empty invocation_events list). This occurs because the agent name is resolved exclusively from invocation_events[0].author, with no fallback for the zero-event case. This is critical for evaluating out-of-scope rejection behavior where an agent correctly declines a request without calling any tools. The judge receives an empty block and cannot validate rubrics that reference the system prompt. Fix: When invocation_events is empty, fall back to the first agent name in app_details.agent_details to resolve developer_instructions. This mirrors how hallucinations_v1.py handles the same scenario. Fixes https://github.com/google/adk-python/issues/5593 --- .../rubric_based_final_response_quality_v1.py | 17 ++++- ..._rubric_based_final_response_quality_v1.py | 63 +++++++++++++++++++ 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/google/adk/evaluation/rubric_based_final_response_quality_v1.py b/src/google/adk/evaluation/rubric_based_final_response_quality_v1.py index df01aba4ff..f0317a1576 100644 --- a/src/google/adk/evaluation/rubric_based_final_response_quality_v1.py +++ b/src/google/adk/evaluation/rubric_based_final_response_quality_v1.py @@ -289,14 +289,25 @@ def format_auto_rater_prompt( app_details = actual_invocation.app_details if app_details: + # Determine agent name from invocation events if available, + # otherwise fall back to the first agent in app_details. + # This ensures developer_instructions are populated even when + # the agent makes zero tool calls (e.g., declining out-of-scope + # requests). + agent_name = None if ( isinstance(actual_invocation.intermediate_data, InvocationEvents) and actual_invocation.intermediate_data.invocation_events ): + agent_name = ( + actual_invocation.intermediate_data.invocation_events[0].author + ) + elif app_details.agent_details: + agent_name = next(iter(app_details.agent_details)) + + if agent_name: developer_instructions = app_details.get_developer_instructions( - agent_name=actual_invocation.intermediate_data.invocation_events[ - 0 - ].author + agent_name=agent_name ) tool_declarations = get_tool_declarations_as_json_str(app_details) diff --git a/tests/unittests/evaluation/test_rubric_based_final_response_quality_v1.py b/tests/unittests/evaluation/test_rubric_based_final_response_quality_v1.py index e100f9c06a..708f749f79 100644 --- a/tests/unittests/evaluation/test_rubric_based_final_response_quality_v1.py +++ b/tests/unittests/evaluation/test_rubric_based_final_response_quality_v1.py @@ -222,3 +222,66 @@ def test_format_auto_rater_prompt_with_intermediate_data_no_tools( prompt = evaluator.format_auto_rater_prompt(invocation, None) assert "No intermediate steps were taken." in prompt + + +def test_format_auto_rater_prompt_with_app_details_empty_invocation_events( + evaluator: RubricBasedFinalResponseQualityV1Evaluator, +): + """Tests that developer_instructions are populated even when invocation_events is empty. + + This covers the case where an agent declines a request without calling any + tools (e.g., out-of-scope rejection), resulting in zero invocation events. + The judge should still receive the developer instructions to evaluate rubrics + that reference the system prompt. + """ + app_details = AppDetails( + agent_details={ + "my_agent": AgentDetails( + name="my_agent", + instructions="Only answer questions about cooking. Decline all other requests.", + tool_declarations=[], + ) + }, + ) + invocation = Invocation( + user_content=genai_types.Content( + parts=[genai_types.Part(text="What is the capital of France?")] + ), + final_response=genai_types.Content( + parts=[genai_types.Part(text="I can only help with cooking.")] + ), + app_details=app_details, + intermediate_data=InvocationEvents(invocation_events=[]), + ) + prompt = evaluator.format_auto_rater_prompt(invocation, None) + + assert "Only answer questions about cooking." in prompt + assert "I can only help with cooking." in prompt + + +def test_format_auto_rater_prompt_with_app_details_no_intermediate_data( + evaluator: RubricBasedFinalResponseQualityV1Evaluator, +): + """Tests that developer_instructions are populated when intermediate_data is None.""" + app_details = AppDetails( + agent_details={ + "my_agent": AgentDetails( + name="my_agent", + instructions="Agent instructions here.", + tool_declarations=[], + ) + }, + ) + invocation = Invocation( + user_content=genai_types.Content( + parts=[genai_types.Part(text="User input here.")] + ), + final_response=genai_types.Content( + parts=[genai_types.Part(text="Final agent response.")] + ), + app_details=app_details, + intermediate_data=None, + ) + prompt = evaluator.format_auto_rater_prompt(invocation, None) + + assert "Agent instructions here." in prompt