Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -8869,6 +8869,133 @@ def test_create_evaluation_set_with_agent_data(
assert candidate_response["candidate"] == "test-candidate"
assert candidate_response["agent_data"] == agent_data

@mock.patch.object(_evals_common, "evals")
@mock.patch.object(_evals_common, "_gcs_utils")
def test_create_evaluation_set_injects_agents_map_from_agent_info(
self, mock_gcs_utils, mock_evals_module
):
"""Tests that agents map is injected from agent_info when agent_data has no agents."""
agent_data = {
"turns": [
{
"turn_index": 0,
"turn_id": "turn_0",
"events": [
{
"author": "my_agent",
"content": {
"parts": [{"text": "hello"}],
"role": "model",
},
}
],
}
]
}
eval_df = pd.DataFrame([{"prompt": "test prompt", "agent_data": agent_data}])

agent_info = vertexai_genai_types.evals.AgentInfo(
name="my_agent",
agents={
"my_agent": vertexai_genai_types.evals.AgentConfig(
agent_id="my_agent",
instruction="You are a helpful agent.",
)
},
root_agent_id="my_agent",
)

mock_gcs_instance = mock_gcs_utils.GcsUtils.return_value
mock_gcs_instance.upload_json_to_prefix.return_value = (
"gs://bucket/path/request.json"
)

mock_evals_instance = mock_evals_module.Evals.return_value
mock_eval_item = mock.Mock()
mock_eval_item.name = "eval_item_1"
mock_evals_instance.create_evaluation_item.return_value = mock_eval_item

mock_eval_set = mock.Mock()
mock_evals_instance.create_evaluation_set.return_value = mock_eval_set

_evals_common._create_evaluation_set_from_dataframe(
api_client=self.mock_api_client,
gcs_dest_prefix="gs://bucket/prefix",
eval_df=eval_df,
candidate_name="test-candidate",
parsed_agent_info=agent_info,
)

call_args = mock_gcs_instance.upload_json_to_prefix.call_args
uploaded_data = call_args.kwargs["data"]

candidate_response = uploaded_data["candidate_responses"][0]
uploaded_agent_data = candidate_response["agent_data"]
assert "agents" in uploaded_agent_data
assert "my_agent" in uploaded_agent_data["agents"]
assert (
uploaded_agent_data["agents"]["my_agent"]["instruction"]
== "You are a helpful agent."
)

@mock.patch.object(_evals_common, "evals")
@mock.patch.object(_evals_common, "_gcs_utils")
def test_create_evaluation_set_preserves_existing_agents_map(
self, mock_gcs_utils, mock_evals_module
):
"""Tests that an existing agents map in agent_data is not overwritten."""
agent_data = {
"turns": [{"turn_id": "turn1", "events": []}],
"agents": {
"original_agent": {
"agent_id": "original_agent",
"instruction": "original instruction",
}
},
}
eval_df = pd.DataFrame([{"prompt": "test prompt", "agent_data": agent_data}])

agent_info = vertexai_genai_types.evals.AgentInfo(
name="different_agent",
agents={
"different_agent": vertexai_genai_types.evals.AgentConfig(
agent_id="different_agent",
instruction="different instruction",
)
},
root_agent_id="different_agent",
)

mock_gcs_instance = mock_gcs_utils.GcsUtils.return_value
mock_gcs_instance.upload_json_to_prefix.return_value = (
"gs://bucket/path/request.json"
)

mock_evals_instance = mock_evals_module.Evals.return_value
mock_eval_item = mock.Mock()
mock_eval_item.name = "eval_item_1"
mock_evals_instance.create_evaluation_item.return_value = mock_eval_item

mock_eval_set = mock.Mock()
mock_evals_instance.create_evaluation_set.return_value = mock_eval_set

_evals_common._create_evaluation_set_from_dataframe(
api_client=self.mock_api_client,
gcs_dest_prefix="gs://bucket/prefix",
eval_df=eval_df,
candidate_name="test-candidate",
parsed_agent_info=agent_info,
)

call_args = mock_gcs_instance.upload_json_to_prefix.call_args
uploaded_data = call_args.kwargs["data"]

candidate_response = uploaded_data["candidate_responses"][0]
uploaded_agent_data = candidate_response["agent_data"]
# Original agents map should be preserved, not overwritten
assert "original_agent" in uploaded_agent_data["agents"]
assert "different_agent" not in uploaded_agent_data["agents"]

@mock.patch.object(_evals_common, "evals")
@mock.patch.object(_evals_common, "_gcs_utils")
def test_create_evaluation_set_with_history_column(
Expand Down
20 changes: 17 additions & 3 deletions vertexai/_genai/_evals_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ def _resolve_dataset(
dest,
eval_df,
candidate_name,
parsed_agent_info=parsed_agent_info,
)
dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
return dataset
Expand Down Expand Up @@ -2845,6 +2846,7 @@ def _create_evaluation_set_from_dataframe(
gcs_dest_prefix: str,
eval_df: pd.DataFrame,
candidate_name: Optional[str] = None,
parsed_agent_info: Optional[types.evals.AgentInfo] = None,
) -> Union[types.EvaluationSet, Any]:
"""Converts a dataframe to an EvaluationSet."""
eval_item_requests = []
Expand Down Expand Up @@ -2877,16 +2879,28 @@ def _create_evaluation_set_from_dataframe(
elif isinstance(agent_data_val, types.evals.AgentData):
agent_data_obj = agent_data_val

# When agent_data exists but has no agents map (e.g. from remote
# agent_engine inference), inject the agents map from agent_info so
# the server-side autorater can access tool definitions and
# instructions.
if (
agent_data_obj
and not agent_data_obj.agents
and parsed_agent_info
and parsed_agent_info.agents
):
agent_data_obj.agents = parsed_agent_info.agents

candidate_responses = []
if _evals_constant.RESPONSE in row or agent_data_obj or intermediate_events:
# Resolve the oneof conflict: prioritize agent_data over flat text
response_text = row.get(_evals_constant.RESPONSE) or None

if agent_data_obj and response_text:
logger.info(
"Both 'response' and 'agent_data' columns found in the evaluation dataset. "
"Prioritizing 'agent_data' and omitting 'response' text to satisfy "
"CandidateResponse protobuf oneof constraints."
"Both 'response' and 'agent_data' columns found in the evaluation"
" dataset. Prioritizing 'agent_data' and omitting 'response' text"
" to satisfy CandidateResponse protobuf oneof constraints."
)
response_text = None

Expand Down
Loading