Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 88 additions & 3 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def test_response_structure(self):
assert result.clusters[1].cluster_id == "cluster-2"

def test_get_loss_analysis_html(self):
"""Tests that _get_loss_analysis_html generates valid HTML with data."""
"""Tests that get_loss_analysis_html generates valid HTML with data."""
from vertexai._genai import _evals_visualization
import json

Expand Down Expand Up @@ -666,7 +666,7 @@ def test_get_loss_analysis_html(self):
}
]
}
html = _evals_visualization._get_loss_analysis_html(json.dumps(data))
html = _evals_visualization.get_loss_analysis_html(json.dumps(data))
assert "Loss Pattern Analysis" in html
assert "test_metric" not in html # data is Base64-encoded in the HTML
assert "<!DOCTYPE html>" in html
Expand All @@ -676,6 +676,91 @@ def test_get_loss_analysis_html(self):
assert "example-section-label" in html # labels for scenario/rubrics
assert "Analysis Summary" in html # summary heading

def test_get_evaluation_html(self):
"""Tests that get_evaluation_html generates valid HTML with data."""
from vertexai._genai import _evals_visualization
import base64
import json

data = {
"summary_metrics": [{"metric_name": "test_metric", "mean_score": 0.85}],
"eval_case_results": [
{
"eval_case_index": 0,
"response_candidate_results": [
{"display_text": "candidate response"}
],
}
],
"metadata": {"dataset": []},
}
payload_json = json.dumps(data)
html = _evals_visualization.get_evaluation_html(payload_json)

assert "<!DOCTYPE html>" in html
assert "<title>Evaluation Report</title>" in html
assert "test_metric" not in html
payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii")
assert payload_b64 in html
assert "DOMPurify" in html

def test_get_comparison_html(self):
"""Tests that get_comparison_html generates valid HTML with data."""
from vertexai._genai import _evals_visualization
import base64
import json

data = {
"summary_metrics": [
{
"metric_name": "test_metric",
"win_rate": 0.6,
"loss_rate": 0.4,
}
],
"eval_case_results": [
{
"eval_case_index": 0,
"response_candidate_results": [
{"display_text": "candidate A"},
{"display_text": "candidate B"},
],
}
],
"metadata": {"dataset": []},
}
payload_json = json.dumps(data)
html = _evals_visualization.get_comparison_html(payload_json)

assert "<!DOCTYPE html>" in html
assert "<title>Eval Comparison Report</title>" in html
assert "test_metric" not in html
payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii")
assert payload_b64 in html
assert "DOMPurify" in html

def test_get_inference_html(self):
"""Tests that get_inference_html generates valid HTML with data."""
from vertexai._genai import _evals_visualization
import base64
import json

data = [
{
"prompt": "What is the capital of France?",
"response": "Paris",
}
]
payload_json = json.dumps(data, ensure_ascii=False)
html = _evals_visualization.get_inference_html(payload_json)

assert "<!DOCTYPE html>" in html
assert "<title>Evaluation Dataset</title>" in html
assert "Paris" not in html
payload_b64 = base64.b64encode(payload_json.encode("utf-8")).decode("ascii")
assert payload_b64 in html
assert "DOMPurify" in html

def test_display_loss_clusters_response_no_ipython(self):
"""Tests graceful fallback when not in IPython."""
from vertexai._genai import _evals_visualization
Expand Down Expand Up @@ -1488,7 +1573,7 @@ def test_display_loss_analysis_results_html(self):
},
ensure_ascii=False,
)
html = _evals_visualization._get_loss_analysis_html(payload_json)
html = _evals_visualization.get_loss_analysis_html(payload_json)
# The HTML is a self-contained report with base64-encoded JSON payload
# decoded by JavaScript at runtime. Verify structure, not content.
assert "<!DOCTYPE html>" in html
Expand Down
31 changes: 20 additions & 11 deletions vertexai/_genai/_evals_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def _extract_dataset_rows(dataset: types.EvaluationDataset) -> list[dict[str, An
return processed_rows


def _get_evaluation_html(eval_result_json: str) -> str:
def get_evaluation_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for single evaluation visualization."""
payload_b64 = _encode_to_base64(eval_result_json)
return textwrap.dedent(
Expand Down Expand Up @@ -787,7 +787,7 @@ def _get_evaluation_html(eval_result_json: str) -> str:
)


def _get_comparison_html(eval_result_json: str) -> str:
def get_comparison_html(eval_result_json: str) -> str:
"""Returns a self-contained HTML for a side-by-side eval comparison."""
payload_b64 = _encode_to_base64(eval_result_json)
return textwrap.dedent(
Expand Down Expand Up @@ -1277,7 +1277,7 @@ def _get_comparison_html(eval_result_json: str) -> str:
)


def _get_inference_html(dataframe_json: str) -> str:
def get_inference_html(dataframe_json: str) -> str:
"""Returns a self-contained HTML for displaying inference results."""
payload_b64 = _encode_to_base64(dataframe_json)
return textwrap.dedent(
Expand Down Expand Up @@ -1475,7 +1475,7 @@ def display_evaluation_result(
summary.update(win_rates[summary["metric_name"]])

result_dump["metadata"] = metadata_payload
html_content = _get_comparison_html(json.dumps(result_dump))
html_content = get_comparison_html(json.dumps(result_dump))
else:
single_dataset = input_dataset_list[0] if input_dataset_list else None
processed_rows = []
Expand All @@ -1499,7 +1499,7 @@ def display_evaluation_result(
cand_res["raw_json"] = original_case["response_raw_json"]

result_dump["metadata"] = metadata_payload
html_content = _get_evaluation_html(json.dumps(result_dump))
html_content = get_evaluation_html(json.dumps(result_dump))

display.display(display.HTML(html_content))

Expand Down Expand Up @@ -1553,11 +1553,11 @@ def display_evaluation_dataset(eval_dataset_obj: types.EvaluationDataset) -> Non
processed_rows.append(processed_row)

dataframe_json_string = json.dumps(processed_rows, ensure_ascii=False, default=str)
html_content = _get_inference_html(dataframe_json_string)
html_content = get_inference_html(dataframe_json_string)
display.display(display.HTML(html_content))


def _get_loss_analysis_html(loss_analysis_json: str) -> str:
def get_loss_analysis_html(loss_analysis_json: str) -> str:
"""Returns self-contained HTML for loss pattern analysis visualization."""
payload_b64 = _encode_to_base64(loss_analysis_json)
return textwrap.dedent(
Expand Down Expand Up @@ -1865,7 +1865,7 @@ def display_loss_clusters_response(
)
raise

html_content = _get_loss_analysis_html(
html_content = get_loss_analysis_html(
json.dumps(result_dump, ensure_ascii=False, default=_pydantic_serializer)
)
display.display(display.HTML(html_content))
Expand All @@ -1892,7 +1892,7 @@ def display_loss_analysis_result(
)
raise

html_content = _get_loss_analysis_html(
html_content = get_loss_analysis_html(
json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer)
)
display.display(display.HTML(html_content))
Expand Down Expand Up @@ -1968,7 +1968,7 @@ def display_loss_analysis_results(

Wraps the list of LossAnalysisResult objects into the same JSON
structure used by GenerateLossClustersResponse and renders using
the shared _get_loss_analysis_html() function.
the shared get_loss_analysis_html() function.

When ``eval_item_map`` is provided (from
``get_evaluation_run(include_evaluation_items=True)``), the examples
Expand Down Expand Up @@ -1997,7 +1997,7 @@ def display_loss_analysis_results(
)
raise

html_content = _get_loss_analysis_html(
html_content = get_loss_analysis_html(
json.dumps(wrapped, ensure_ascii=False, default=_pydantic_serializer)
)
display.display(display.HTML(html_content))
Expand All @@ -2015,3 +2015,12 @@ def display_evaluation_run_status(eval_run_obj: "types.EvaluationRun") -> None:
error_message = str(eval_run_obj.error) if eval_run_obj.error else None
html_content = _get_status_html(status, error_message)
display.display(display.HTML(html_content))


# Backward-compatible private aliases for the public HTML generators.
# These are kept temporarily to avoid breaking existing callers that depend on
# the previous private names. New code should use the public names above.
_get_evaluation_html = get_evaluation_html
_get_comparison_html = get_comparison_html
_get_inference_html = get_inference_html
_get_loss_analysis_html = get_loss_analysis_html
Loading