From 657f379dc516a1db75eca1686731b7a4944a3b41 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Mon, 11 May 2026 22:50:17 -0700 Subject: [PATCH] feat: GenAI Client(evals) - Add red_teaming_config support for create_evaluation_run PiperOrigin-RevId: 914075746 --- .../replays/test_create_evaluation_run.py | 30 ++++ tests/unit/vertexai/genai/test_evals.py | 83 +++++++++ vertexai/_genai/_evals_utils.py | 14 ++ vertexai/_genai/evals.py | 33 ++++ vertexai/_genai/types/__init__.py | 30 ++++ vertexai/_genai/types/common.py | 162 ++++++++++++++++++ 6 files changed, 352 insertions(+) diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index 4faf4ab0f2..e70f2a8d54 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -614,6 +614,7 @@ def test_create_eval_run_with_metric_resource_name(mock_uuid4, client): # assert eval_item.evaluation_request.candidate_responses == [] # assert evaluation_run.error is None + # def test_create_eval_run_data_source_evaluation_dataset_with_agent_info_and_prompt_template_data( # client, # ): @@ -708,6 +709,35 @@ def test_create_eval_run_with_metric_resource_name(mock_uuid4, client): # == INPUT_DF_WITH_CONTEXT_AND_HISTORY.iloc[i]["response"] # ) # assert evaluation_run.error is None +def test_create_eval_run_with_red_teaming_config(client): + """Tests that create_evaluation_run() with red_teaming_config sends analysisConfigs.""" + evaluation_run = client.evals.create_evaluation_run( + name="test_red_teaming", + display_name="test_red_teaming", + dataset=types.EvaluationRunDataSource(evaluation_set=EVAL_SET_NAME), + dest=GCS_DEST, + metrics=[], + red_teaming_config=types.RedTeamingAnalysisConfig( + attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"], + vulnerable_tools=[ + types.VulnerableTool( + tool_name="search_flights", + json_paths=["$.flights[0].description"], + ), + ], + ), + ) + assert isinstance(evaluation_run, types.EvaluationRun) + assert evaluation_run.display_name == "test_red_teaming" + assert evaluation_run.state == types.EvaluationRunState.PENDING + assert evaluation_run.analysis_configs is not None + assert len(evaluation_run.analysis_configs) == 1 + rt_config = evaluation_run.analysis_configs[0].red_teaming_analysis_config + assert rt_config.attack_categories == ["FINANCIAL_OR_CREDENTIAL_PHISHING"] + assert rt_config.vulnerable_tools[0].tool_name == "search_flights" + assert evaluation_run.error is None + + pytest_plugins = ("pytest_asyncio",) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 77e6180340..10d2b9144d 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -1835,6 +1835,89 @@ def test_loss_analysis_metrics_accepts_metric_object(self): assert result[0].candidate == "agent-1" +class TestRedTeamingTypes: + """Unit tests for red teaming type definitions.""" + + def test_red_teaming_analysis_config_construction(self): + config = common_types.RedTeamingAnalysisConfig( + attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"], + vulnerable_tools=[ + common_types.VulnerableTool( + tool_name="search_flights", + json_paths=["$.flights[0].description"], + ), + ], + ) + assert len(config.attack_categories) == 1 + assert config.vulnerable_tools[0].tool_name == "search_flights" + + def test_red_teaming_analysis_config_optional_fields(self): + config = common_types.RedTeamingAnalysisConfig() + assert config.attack_categories is None + assert config.vulnerable_tools is None + + def test_evaluation_run_results_has_red_teaming_results(self): + results = common_types.EvaluationRunResults( + red_teaming_analysis_results=[ + common_types.RedTeamingAnalysisResult( + category_results=[ + common_types.AttackCategoryResult( + attack_category="FINANCIAL_OR_CREDENTIAL_PHISHING", + attack_success_rate=0.9, + ), + ], + ) + ], + ) + assert len(results.red_teaming_analysis_results) == 1 + assert ( + results.red_teaming_analysis_results[0] + .category_results[0] + .attack_success_rate + == 0.9 + ) + + def test_create_params_accepts_analysis_configs(self): + params = common_types._CreateEvaluationRunParameters( + name="test-run", + analysis_configs=[ + common_types.AnalysisConfig( + red_teaming_analysis_config=common_types.RedTeamingAnalysisConfig( + attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"], + ), + ), + ], + ) + assert len(params.analysis_configs) == 1 + + +class TestResolveRedTeamingConfig: + """Unit tests for _resolve_red_teaming_config.""" + + def test_none_when_no_config(self): + result = _evals_utils._resolve_red_teaming_config() + assert result is None + + def test_wraps_config_in_analysis_configs(self): + config = common_types.RedTeamingAnalysisConfig( + attack_categories=["FINANCIAL_OR_CREDENTIAL_PHISHING"], + ) + result = _evals_utils._resolve_red_teaming_config(config) + assert len(result) == 1 + assert isinstance(result[0], common_types.AnalysisConfig) + assert ( + result[0].red_teaming_analysis_config.attack_categories[0] + == "FINANCIAL_OR_CREDENTIAL_PHISHING" + ) + + def test_accepts_dict_input(self): + result = _evals_utils._resolve_red_teaming_config( + {"attack_categories": ["INJECTED_HOSTILITY_AND_HARASSMENT"]} + ) + assert len(result) == 1 + assert isinstance(result[0], common_types.AnalysisConfig) + + class TestResolveMetricName: """Unit tests for _resolve_metric_name.""" diff --git a/vertexai/_genai/_evals_utils.py b/vertexai/_genai/_evals_utils.py index feb24bfbf1..97bc2d478d 100644 --- a/vertexai/_genai/_evals_utils.py +++ b/vertexai/_genai/_evals_utils.py @@ -541,6 +541,20 @@ def _resolve_eval_run_loss_configs( return configs +def _resolve_red_teaming_config( + red_teaming_config: Optional[types.RedTeamingAnalysisConfigOrDict] = None, +) -> Optional[list[types.AnalysisConfig]]: + """Wraps a RedTeamingAnalysisConfig into analysis_configs for the API.""" + if not red_teaming_config: + return None + config = ( + types.RedTeamingAnalysisConfig.model_validate(red_teaming_config) + if isinstance(red_teaming_config, dict) + else red_teaming_config + ) + return [types.AnalysisConfig(red_teaming_analysis_config=config)] + + def _resolve_loss_analysis_config( eval_result: types.EvaluationResult, config: Optional[types.LossAnalysisConfig] = None, diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 9ca6dcdbca..2064432204 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -130,6 +130,13 @@ def _CreateEvaluationRunParameters_to_vertex( if getv(from_object, ["config"]) is not None: setv(to_object, ["config"], getv(from_object, ["config"])) + if getv(from_object, ["analysis_configs"]) is not None: + setv( + to_object, + ["analysisConfigs"], + [item for item in getv(from_object, ["analysis_configs"])], + ) + return to_object @@ -603,6 +610,13 @@ def _EvaluationRun_from_vertex( if getv(from_object, ["labels"]) is not None: setv(to_object, ["labels"], getv(from_object, ["labels"])) + if getv(from_object, ["analysisConfigs"]) is not None: + setv( + to_object, + ["analysis_configs"], + [item for item in getv(from_object, ["analysisConfigs"])], + ) + return to_object @@ -1159,6 +1173,7 @@ def _create_evaluation_run( dict[str, types.EvaluationRunInferenceConfigOrDict] ] = None, config: Optional[types.CreateEvaluationRunConfigOrDict] = None, + analysis_configs: Optional[list[types.AnalysisConfigOrDict]] = None, ) -> types.EvaluationRun: """ Creates an EvaluationRun. @@ -1172,6 +1187,7 @@ def _create_evaluation_run( labels=labels, inference_configs=inference_configs, config=config, + analysis_configs=analysis_configs, ) request_url_dict: Optional[dict[str, str]] @@ -2616,6 +2632,7 @@ def create_evaluation_run( labels: Optional[dict[str, str]] = None, loss_analysis_metrics: Optional[list[Union[str, types.MetricOrDict]]] = None, loss_analysis_configs: Optional[list[types.LossAnalysisConfigOrDict]] = None, + red_teaming_config: Optional[types.RedTeamingAnalysisConfigOrDict] = None, config: Optional[types.CreateEvaluationRunConfigOrDict] = None, ) -> types.EvaluationRun: """Creates an EvaluationRun. @@ -2734,6 +2751,9 @@ def create_evaluation_run( loss_analysis_configs=loss_analysis_configs, inference_configs=inference_configs, ) + resolved_analysis_configs = _evals_utils._resolve_red_teaming_config( + red_teaming_config + ) evaluation_config = types.EvaluationRunConfig( output_config=output_config, metrics=resolved_metrics, @@ -2751,6 +2771,7 @@ def create_evaluation_run( data_source=resolved_dataset, evaluation_config=evaluation_config, inference_configs=resolved_inference_configs, + analysis_configs=resolved_analysis_configs, labels=resolved_labels, config=config, ) @@ -3299,6 +3320,7 @@ async def _create_evaluation_run( dict[str, types.EvaluationRunInferenceConfigOrDict] ] = None, config: Optional[types.CreateEvaluationRunConfigOrDict] = None, + analysis_configs: Optional[list[types.AnalysisConfigOrDict]] = None, ) -> types.EvaluationRun: """ Creates an EvaluationRun. @@ -3312,6 +3334,7 @@ async def _create_evaluation_run( labels=labels, inference_configs=inference_configs, config=config, + analysis_configs=analysis_configs, ) request_url_dict: Optional[dict[str, str]] @@ -4395,6 +4418,7 @@ async def create_evaluation_run( inference_configs: Optional[ dict[str, types.EvaluationRunInferenceConfigOrDict] ] = None, + red_teaming_config: Optional[types.RedTeamingAnalysisConfigOrDict] = None, labels: Optional[dict[str, str]] = None, loss_analysis_metrics: Optional[list[Union[str, types.MetricOrDict]]] = None, loss_analysis_configs: Optional[list[types.LossAnalysisConfigOrDict]] = None, @@ -4426,6 +4450,11 @@ async def create_evaluation_run( this will be automatically constructed using `agent_info` and `user_simulator_config`. Example: {"candidate-1": types.EvaluationRunInferenceConfig(model="gemini-2.5-flash")} + red_teaming_config: This field is experimental and may change in future + versions. Optional configuration for automated Agent Red Teaming + analysis. Specifies attack categories and vulnerable tools to + test. When provided, the server runs a red teaming pipeline + instead of standard evaluation metrics. labels: The labels to apply to the evaluation run. loss_analysis_metrics: This field is experimental and may change in future versions. Optional list of metrics to run loss analysis on. The @@ -4511,6 +4540,9 @@ async def create_evaluation_run( loss_analysis_configs=loss_analysis_configs, inference_configs=inference_configs, ) + resolved_analysis_configs = _evals_utils._resolve_red_teaming_config( + red_teaming_config + ) evaluation_config = types.EvaluationRunConfig( output_config=output_config, metrics=resolved_metrics, @@ -4529,6 +4561,7 @@ async def create_evaluation_run( data_source=resolved_dataset, evaluation_config=evaluation_config, inference_configs=resolved_inference_configs, + analysis_configs=resolved_analysis_configs, labels=resolved_labels, config=config, ) diff --git a/vertexai/_genai/types/__init__.py b/vertexai/_genai/types/__init__.py index c749fbd16c..706188be8f 100644 --- a/vertexai/_genai/types/__init__.py +++ b/vertexai/_genai/types/__init__.py @@ -181,6 +181,9 @@ from .common import AggregatedMetricResult from .common import AggregatedMetricResultDict from .common import AggregatedMetricResultOrDict +from .common import AnalysisConfig +from .common import AnalysisConfigDict +from .common import AnalysisConfigOrDict from .common import AppendAgentEngineSessionEventConfig from .common import AppendAgentEngineSessionEventConfigDict from .common import AppendAgentEngineSessionEventConfigOrDict @@ -202,6 +205,9 @@ from .common import AssessDatasetConfig from .common import AssessDatasetConfigDict from .common import AssessDatasetConfigOrDict +from .common import AttackCategoryResult +from .common import AttackCategoryResultDict +from .common import AttackCategoryResultOrDict from .common import BatchPredictionResourceUsageAssessmentConfig from .common import BatchPredictionResourceUsageAssessmentConfigDict from .common import BatchPredictionResourceUsageAssessmentConfigOrDict @@ -1063,6 +1069,12 @@ from .common import ReasoningEngineTrafficConfigTrafficSplitManualTarget from .common import ReasoningEngineTrafficConfigTrafficSplitManualTargetDict from .common import ReasoningEngineTrafficConfigTrafficSplitManualTargetOrDict +from .common import RedTeamingAnalysisConfig +from .common import RedTeamingAnalysisConfigDict +from .common import RedTeamingAnalysisConfigOrDict +from .common import RedTeamingAnalysisResult +from .common import RedTeamingAnalysisResultDict +from .common import RedTeamingAnalysisResultOrDict from .common import ReservationAffinity from .common import ReservationAffinityDict from .common import ReservationAffinityOrDict @@ -1440,6 +1452,9 @@ from .common import VertexBaseConfig from .common import VertexBaseConfigDict from .common import VertexBaseConfigOrDict +from .common import VulnerableTool +from .common import VulnerableToolDict +from .common import VulnerableToolOrDict from .common import WinRateStats from .common import WinRateStatsDict from .common import WinRateStatsOrDict @@ -1577,12 +1592,27 @@ "EvaluationRunInferenceConfig", "EvaluationRunInferenceConfigDict", "EvaluationRunInferenceConfigOrDict", + "VulnerableTool", + "VulnerableToolDict", + "VulnerableToolOrDict", + "RedTeamingAnalysisConfig", + "RedTeamingAnalysisConfigDict", + "RedTeamingAnalysisConfigOrDict", + "AnalysisConfig", + "AnalysisConfigDict", + "AnalysisConfigOrDict", "CreateEvaluationRunConfig", "CreateEvaluationRunConfigDict", "CreateEvaluationRunConfigOrDict", "SummaryMetric", "SummaryMetricDict", "SummaryMetricOrDict", + "AttackCategoryResult", + "AttackCategoryResultDict", + "AttackCategoryResultOrDict", + "RedTeamingAnalysisResult", + "RedTeamingAnalysisResultDict", + "RedTeamingAnalysisResultOrDict", "LossTaxonomyEntry", "LossTaxonomyEntryDict", "LossTaxonomyEntryOrDict", diff --git a/vertexai/_genai/types/common.py b/vertexai/_genai/types/common.py index 4eb672e852..73210fe590 100644 --- a/vertexai/_genai/types/common.py +++ b/vertexai/_genai/types/common.py @@ -2579,6 +2579,85 @@ class EvaluationRunInferenceConfigDict(TypedDict, total=False): ] +class VulnerableTool(_common.BaseModel): + """A tool considered high risk for prompt injection.""" + + tool_name: Optional[str] = Field( + default=None, + description="""Optional. The name of the vulnerable function/tool (e.g., "search_flights").""", + ) + json_paths: Optional[list[str]] = Field( + default=None, + description="""Optional. JSON Paths within the tool's FunctionResponse where malicious content could be injected.""", + ) + + +class VulnerableToolDict(TypedDict, total=False): + """A tool considered high risk for prompt injection.""" + + tool_name: Optional[str] + """Optional. The name of the vulnerable function/tool (e.g., "search_flights").""" + + json_paths: Optional[list[str]] + """Optional. JSON Paths within the tool's FunctionResponse where malicious content could be injected.""" + + +VulnerableToolOrDict = Union[VulnerableTool, VulnerableToolDict] + + +class RedTeamingAnalysisConfig(_common.BaseModel): + """Configuration for the automated Agent Red Teaming analysis.""" + + attack_categories: Optional[list[str]] = Field( + default=None, + description="""Optional. Specific attack categories to test against.""", + ) + vulnerable_tools: Optional[list[VulnerableTool]] = Field( + default=None, + description="""Optional. Manually defined vulnerable tools and their injection paths.""", + ) + + +class RedTeamingAnalysisConfigDict(TypedDict, total=False): + """Configuration for the automated Agent Red Teaming analysis.""" + + attack_categories: Optional[list[str]] + """Optional. Specific attack categories to test against.""" + + vulnerable_tools: Optional[list[VulnerableToolDict]] + """Optional. Manually defined vulnerable tools and their injection paths.""" + + +RedTeamingAnalysisConfigOrDict = Union[ + RedTeamingAnalysisConfig, RedTeamingAnalysisConfigDict +] + + +class AnalysisConfig(_common.BaseModel): + """Configuration for an analysis to be performed on an evaluation run.""" + + analysis_name: Optional[str] = Field( + default=None, description="""Optional. A name for this analysis.""" + ) + red_teaming_analysis_config: Optional[RedTeamingAnalysisConfig] = Field( + default=None, + description="""Configuration for the automated Agent Red Teaming analysis.""", + ) + + +class AnalysisConfigDict(TypedDict, total=False): + """Configuration for an analysis to be performed on an evaluation run.""" + + analysis_name: Optional[str] + """Optional. A name for this analysis.""" + + red_teaming_analysis_config: Optional[RedTeamingAnalysisConfigDict] + """Configuration for the automated Agent Red Teaming analysis.""" + + +AnalysisConfigOrDict = Union[AnalysisConfig, AnalysisConfigDict] + + class CreateEvaluationRunConfig(_common.BaseModel): """Config to create an evaluation run.""" @@ -2634,6 +2713,9 @@ class _CreateEvaluationRunParameters(_common.BaseModel): config: Optional[CreateEvaluationRunConfig] = Field( default=None, description="""""" ) + analysis_configs: Optional[list[AnalysisConfig]] = Field( + default=None, description="""""" + ) class _CreateEvaluationRunParametersDict(TypedDict, total=False): @@ -2660,6 +2742,9 @@ class _CreateEvaluationRunParametersDict(TypedDict, total=False): config: Optional[CreateEvaluationRunConfigDict] """""" + analysis_configs: Optional[list[AnalysisConfigDict]] + """""" + _CreateEvaluationRunParametersOrDict = Union[ _CreateEvaluationRunParameters, _CreateEvaluationRunParametersDict @@ -2696,6 +2781,70 @@ class SummaryMetricDict(TypedDict, total=False): SummaryMetricOrDict = Union[SummaryMetric, SummaryMetricDict] +class AttackCategoryResult(_common.BaseModel): + """The red teaming outcome for a specific attack category.""" + + attack_category: Optional[str] = Field( + default=None, description="""The category of the attack evaluated.""" + ) + attack_success_rate: Optional[float] = Field( + default=None, + description="""The ratio of successful attacks given a fixed budget.""", + ) + vulnerability_insight: Optional[str] = Field( + default=None, description="""Insights into why an attack succeeded or failed.""" + ) + + +class AttackCategoryResultDict(TypedDict, total=False): + """The red teaming outcome for a specific attack category.""" + + attack_category: Optional[str] + """The category of the attack evaluated.""" + + attack_success_rate: Optional[float] + """The ratio of successful attacks given a fixed budget.""" + + vulnerability_insight: Optional[str] + """Insights into why an attack succeeded or failed.""" + + +AttackCategoryResultOrDict = Union[AttackCategoryResult, AttackCategoryResultDict] + + +class RedTeamingAnalysisResult(_common.BaseModel): + """The top-level result for Red Teaming analysis.""" + + config: Optional[RedTeamingAnalysisConfig] = Field( + default=None, + description="""The configuration used to generate this analysis.""", + ) + analysis_time: Optional[str] = Field( + default=None, description="""The timestamp when this analysis was performed.""" + ) + category_results: Optional[list[AttackCategoryResult]] = Field( + default=None, description="""Detailed results by attack category.""" + ) + + +class RedTeamingAnalysisResultDict(TypedDict, total=False): + """The top-level result for Red Teaming analysis.""" + + config: Optional[RedTeamingAnalysisConfigDict] + """The configuration used to generate this analysis.""" + + analysis_time: Optional[str] + """The timestamp when this analysis was performed.""" + + category_results: Optional[list[AttackCategoryResultDict]] + """Detailed results by attack category.""" + + +RedTeamingAnalysisResultOrDict = Union[ + RedTeamingAnalysisResult, RedTeamingAnalysisResultDict +] + + class LossTaxonomyEntry(_common.BaseModel): """A specific entry in the loss pattern taxonomy.""" @@ -2879,6 +3028,9 @@ class EvaluationRunResults(_common.BaseModel): default=None, description="""The loss analysis results for the evaluation run.""", ) + red_teaming_analysis_results: Optional[list[RedTeamingAnalysisResult]] = Field( + default=None, description="""The Red Teaming analysis results.""" + ) class EvaluationRunResultsDict(TypedDict, total=False): @@ -2893,6 +3045,9 @@ class EvaluationRunResultsDict(TypedDict, total=False): loss_analysis_results: Optional[list[LossAnalysisResultDict]] """The loss analysis results for the evaluation run.""" + red_teaming_analysis_results: Optional[list[RedTeamingAnalysisResultDict]] + """The Red Teaming analysis results.""" + EvaluationRunResultsOrDict = Union[EvaluationRunResults, EvaluationRunResultsDict] @@ -3437,6 +3592,10 @@ class EvaluationRun(_common.BaseModel): description="""This field is experimental and may change in future versions. The inference configs for the evaluation run.""", ) labels: Optional[dict[str, str]] = Field(default=None, description="""""") + analysis_configs: Optional[list[AnalysisConfig]] = Field( + default=None, + description="""The analysis configurations for the evaluation run.""", + ) # TODO(b/448806531): Remove all the overridden _from_response methods once the # ticket is resolved and published. @@ -3537,6 +3696,9 @@ class EvaluationRunDict(TypedDict, total=False): labels: Optional[dict[str, str]] """""" + analysis_configs: Optional[list[AnalysisConfigDict]] + """The analysis configurations for the evaluation run.""" + EvaluationRunOrDict = Union[EvaluationRun, EvaluationRunDict]