diff --git a/manifests/python.yml b/manifests/python.yml
index ccdf8e9da1f..c404a5c8cea 100644
--- a/manifests/python.yml
+++ b/manifests/python.yml
@@ -1089,7 +1089,7 @@ manifest:
   tests/ffe/test_dynamic_evaluation.py::Test_FFE_RC_Down_From_Start: v4.0.0
   tests/ffe/test_dynamic_evaluation.py::Test_FFE_RC_Unavailable: flaky (FFL-1622)
   tests/ffe/test_exposures.py: v4.2.0-dev
-  tests/ffe/test_flag_eval_metrics.py: missing_feature
+  tests/ffe/test_flag_eval_metrics.py: v4.7.0-dev
   tests/integration_frameworks/llm/anthropic/test_anthropic_apm.py::TestAnthropicApmMessages: v3.16.0
   tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages: v3.16.0
   tests/integration_frameworks/llm/anthropic/test_anthropic_llmobs.py::TestAnthropicLlmObsMessages::test_create: missing_feature  # ephemeral cache TTL metrics not yet released
diff --git a/tests/ffe/test_flag_eval_metrics.py b/tests/ffe/test_flag_eval_metrics.py
index c0116d8564a..f5856862014 100644
--- a/tests/ffe/test_flag_eval_metrics.py
+++ b/tests/ffe/test_flag_eval_metrics.py
@@ -6,6 +6,8 @@
     scenarios,
     features,
     remote_config as rc,
+    context,
+    irrelevant,
 )
 
 
@@ -15,9 +17,11 @@
 
 def make_ufc_fixture(flag_key: str, variant_key: str = "on", variation_type: str = "STRING", *, enabled: bool = True):
     """Create a UFC fixture with the given flag configuration."""
-    values: dict[str, dict[str, str | bool]] = {
+    values: dict[str, dict[str, str | bool | float | int]] = {
         "STRING": {"on": "on-value", "off": "off-value"},
         "BOOLEAN": {"on": True, "off": False},
+        "NUMERIC": {"on": 1.5, "off": 0.0},  # Decimal value for type_mismatch testing (NUMERIC→INTEGER)
+        "INTEGER": {"on": 42, "off": 0},
     }
     var_values = values[variation_type]
 
@@ -77,7 +81,7 @@ def get_tag_value(tags: list[str], key: str):
 
 
 @scenarios.feature_flagging_and_experimentation
-@features.feature_flags_exposures
+@features.feature_flags_eval_metrics
 class Test_FFE_Eval_Metric_Basic:
     """Test that a flag evaluation produces a feature_flag.evaluations metric."""
 
@@ -128,7 +132,7 @@ def test_ffe_eval_metric_basic(self):
 
 
 @scenarios.feature_flagging_and_experimentation
-@features.feature_flags_exposures
+@features.feature_flags_eval_metrics
 class Test_FFE_Eval_Metric_Count:
     """Test that multiple evaluations of the same flag produce correct metric count."""
 
@@ -179,7 +183,7 @@ def test_ffe_eval_metric_count(self):
 
 
 @scenarios.feature_flagging_and_experimentation
-@features.feature_flags_exposures
+@features.feature_flags_eval_metrics
 class Test_FFE_Eval_Metric_Different_Flags:
     """Test that different flags produce separate metric series."""
 
@@ -267,12 +271,351 @@ def test_ffe_eval_metric_different_flags(self):
         assert len(metrics_b) > 0, f"Expected metric for flag '{self.flag_b}', found none. All: {find_eval_metrics()}"
 
 
+# =============================================================================
+# Reason Tests
+#
+# OpenFeature defines several resolution reasons. Test coverage:
+#
+#   Reason           | Test                              | Scenario
+#   -----------------|-----------------------------------|----------------------------------
+#   STATIC           | Test_FFE_Eval_Metric_Basic        | No rules, no shards (catch-all)
+#   TARGETING_MATCH  | Test_FFE_Eval_Reason_Targeting    | Rules match the context
+#   SPLIT            | Test_FFE_Eval_Reason_Split        | Shards determine variant
+#   DEFAULT          | Test_FFE_Eval_Reason_Default      | Rules don't match, fallback used
+#   DISABLED         | Test_FFE_Eval_Reason_Disabled     | Flag is disabled
+#
+# =============================================================================
+
+
+def make_targeting_fixture(flag_key: str, attribute: str, match_value: str):
+    """Create a UFC fixture with a targeting rule."""
+    return {
+        "createdAt": "2024-04-17T19:40:53.716Z",
+        "format": "SERVER",
+        "environment": {"name": "Test"},
+        "flags": {
+            flag_key: {
+                "key": flag_key,
+                "enabled": True,
+                "variationType": "STRING",
+                "variations": {
+                    "on": {"key": "on", "value": "on-value"},
+                    "off": {"key": "off", "value": "off-value"},
+                },
+                "allocations": [
+                    {
+                        "key": "targeted-allocation",
+                        "rules": [
+                            {
+                                "conditions": [
+                                    {
+                                        "operator": "ONE_OF",
+                                        "attribute": attribute,
+                                        "value": [match_value],
+                                    }
+                                ]
+                            }
+                        ],
+                        "splits": [{"variationKey": "on", "shards": []}],
+                        "doLog": True,
+                    }
+                ],
+            }
+        },
+    }
+
+
+def make_split_fixture(flag_key: str):
+    """Create a UFC fixture with shards for percentage-based rollout (50/50 split)."""
+    return {
+        "createdAt": "2024-04-17T19:40:53.716Z",
+        "format": "SERVER",
+        "environment": {"name": "Test"},
+        "flags": {
+            flag_key: {
+                "key": flag_key,
+                "enabled": True,
+                "variationType": "STRING",
+                "variations": {
+                    "on": {"key": "on", "value": "on-value"},
+                    "off": {"key": "off", "value": "off-value"},
+                },
+                "allocations": [
+                    {
+                        "key": "split-allocation",
+                        "rules": [],
+                        "splits": [
+                            {
+                                "variationKey": "on",
+                                "shards": [
+                                    {
+                                        "salt": "test-salt",
+                                        "totalShards": 10000,
+                                        "ranges": [{"start": 0, "end": 5000}],
+                                    }
+                                ],
+                            },
+                            {
+                                "variationKey": "off",
+                                "shards": [
+                                    {
+                                        "salt": "test-salt",
+                                        "totalShards": 10000,
+                                        "ranges": [{"start": 5000, "end": 10000}],
+                                    }
+                                ],
+                            },
+                        ],
+                        "doLog": True,
+                    }
+                ],
+            }
+        },
+    }
+
+
+def make_invalid_regex_fixture(flag_key: str, invalid_regex: str = "[invalid"):
+    """Create a UFC fixture with an invalid regex pattern in a MATCHES condition.
+
+    This tests the PARSE_ERROR scenario where the configuration contains
+    a syntactically invalid regex pattern that fails during evaluation.
+    """
+    return {
+        "createdAt": "2024-04-17T19:40:53.716Z",
+        "format": "SERVER",
+        "environment": {"name": "Test"},
+        "flags": {
+            flag_key: {
+                "key": flag_key,
+                "enabled": True,
+                "variationType": "STRING",
+                "variations": {
+                    "on": {"key": "on", "value": "on-value"},
+                    "off": {"key": "off", "value": "off-value"},
+                },
+                "allocations": [
+                    {
+                        "key": "regex-allocation",
+                        "rules": [
+                            {
+                                "conditions": [
+                                    {
+                                        "operator": "MATCHES",
+                                        "attribute": "email",
+                                        "value": invalid_regex,  # Invalid regex pattern
+                                    }
+                                ]
+                            }
+                        ],
+                        "splits": [{"variationKey": "on", "shards": []}],
+                        "doLog": True,
+                    }
+                ],
+            }
+        },
+    }
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_Reason_Targeting:
+    """Test that matching targeting rules produce reason=targeting_match."""
+
+    def setup_ffe_eval_reason_targeting(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-reason-targeting"
+        self.flag_key = "reason-targeting-flag"
+        rc.tracer_rc_state.set_config(
+            f"{RC_PATH}/{config_id}/config", make_targeting_fixture(self.flag_key, "tier", "premium")
+        ).apply()
+
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {"tier": "premium"},  # Matches the targeting rule
+            },
+        )
+
+    def test_ffe_eval_reason_targeting(self):
+        """Test that targeting rule match produces reason=targeting_match."""
+        assert self.r.status_code == 200, f"Flag evaluation failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}"
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "targeting_match", (
+            f"Expected reason 'targeting_match', got tags: {tags}"
+        )
+        assert get_tag_value(tags, "feature_flag.result.variant") == "on", f"Expected variant 'on', got tags: {tags}"
+
+
 @scenarios.feature_flagging_and_experimentation
-@features.feature_flags_exposures
-class Test_FFE_Eval_Metric_Error:
-    """Test that evaluating a non-existent flag produces metric with error tags."""
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_Reason_Split:
+    """Test that shard-based allocation produces reason=split."""
 
-    def setup_ffe_eval_metric_error(self):
+    def setup_ffe_eval_reason_split(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-reason-split"
+        self.flag_key = "reason-split-flag"
+        rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_split_fixture(self.flag_key)).apply()
+
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {},
+            },
+        )
+
+    def test_ffe_eval_reason_split(self):
+        """Test that shard-based evaluation produces reason=split."""
+        assert self.r.status_code == 200, f"Flag evaluation failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}"
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "split", (
+            f"Expected reason 'split', got tags: {tags}"
+        )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_Reason_Default:
+    """Test that unmatched targeting rules produce reason=default."""
+
+    def setup_ffe_eval_reason_default(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-reason-default"
+        self.flag_key = "reason-default-flag"
+        # Flag requires tier=premium, but we'll send tier=basic
+        rc.tracer_rc_state.set_config(
+            f"{RC_PATH}/{config_id}/config", make_targeting_fixture(self.flag_key, "tier", "premium")
+        ).apply()
+
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {"tier": "basic"},  # Does NOT match the targeting rule
+            },
+        )
+
+    def test_ffe_eval_reason_default(self):
+        """Test that unmatched rules produce reason=default."""
+        assert self.r.status_code == 200, f"Flag evaluation failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}"
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "default", (
+            f"Expected reason 'default', got tags: {tags}"
+        )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_Reason_Disabled:
+    """Test that a disabled flag produces reason=disabled."""
+
+    def setup_ffe_eval_reason_disabled(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-reason-disabled"
+        self.flag_key = "reason-disabled-flag"
+        rc.tracer_rc_state.set_config(
+            f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key, enabled=False)
+        ).apply()
+
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {},
+            },
+        )
+
+    def test_ffe_eval_reason_disabled(self):
+        """Test that disabled flag produces reason=disabled."""
+        assert self.r.status_code == 200, f"Flag evaluation failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}"
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "disabled", (
+            f"Expected reason 'disabled', got tags: {tags}"
+        )
+
+
+# =============================================================================
+# Error Code Tests
+#
+# OpenFeature defines 8 error codes. Test coverage:
+#
+#   Error Code             | Test
+#   -----------------------|---------------------------------------------
+#   FLAG_NOT_FOUND         | Test_FFE_Eval_Config_Exists_Flag_Missing
+#   TYPE_MISMATCH          | Test_FFE_Eval_Metric_Type_Mismatch, Test_FFE_Eval_Metric_Numeric_To_Integer
+#   PARSE_ERROR            | Test_FFE_Eval_Metric_Parse_Error
+#   GENERAL                | (not tested - catch-all error code)
+#   TARGETING_KEY_MISSING  | Test_FFE_Eval_Targeting_Key_Optional (verifies it's NOT returned; JS excluded)
+#   INVALID_CONTEXT        | Test_FFE_Eval_Invalid_Context_Nested_Attribute (Python only)
+#   PROVIDER_NOT_READY     | Test_FFE_Eval_No_Config_Loaded
+#   PROVIDER_FATAL         | (not tested - requires fatal provider error)
+#
+# INVALID_CONTEXT behavioral differences:
+#   - Python: Returns for nested dict/list attributes (PyO3 conversion failure)
+#   - Go: Flattens nested objects to dot notation instead
+#   - Ruby: Silently skips unsupported attribute types
+#   - Java: Returns only for null context, not nested attributes
+#   - .NET: Relies on native library; not yet standardized
+#   - JS: Does not use INVALID_CONTEXT at all
+#
+# =============================================================================
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_Config_Exists_Flag_Missing:
+    """Test error metrics when config exists but requested flag is missing.
+
+    This is distinct from Test_FFE_Eval_No_Config_Loaded:
+    - Here: Config IS loaded, but the specific flag doesn't exist → error.type=flag_not_found
+    - There: No config loaded at all → error.type=general
+
+    Both should return reason=error, but with different error.type values.
+    """
+
+    def setup_ffe_eval_config_exists_flag_missing(self):
         rc.tracer_rc_state.reset().apply()
 
         # Set up config with a different flag than what we'll request
@@ -291,8 +634,8 @@ def setup_ffe_eval_metric_error(self):
             },
         )
 
-    def test_ffe_eval_metric_error(self):
-        """Test that error evaluations produce metric with error.type tag."""
+    def test_ffe_eval_config_exists_flag_missing(self):
+        """Test that missing flag (with config loaded) produces error.type=flag_not_found."""
         assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}"
 
         metrics = find_eval_metrics(self.flag_key)
@@ -312,7 +655,7 @@ def test_ffe_eval_metric_error(self):
 
 
 @scenarios.feature_flagging_and_experimentation
-@features.feature_flags_exposures
+@features.feature_flags_eval_metrics
 class Test_FFE_Eval_Metric_Type_Mismatch:
     """Test that requesting the wrong type produces a metric with type_mismatch error.
 
@@ -361,3 +704,369 @@ def test_ffe_eval_metric_type_mismatch(self):
         assert get_tag_value(tags, "error.type") == "type_mismatch", (
             f"Expected error.type 'type_mismatch', got tags: {tags}"
         )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_Metric_Numeric_To_Integer:
+    """Test that evaluating a NUMERIC flag as INTEGER produces type_mismatch error.
+
+    This configures a NUMERIC flag with a decimal value (1.5) but evaluates it as INTEGER.
+    Since NUMERIC and INTEGER are different types, this produces a type_mismatch error.
+    """
+
+    def setup_ffe_eval_metric_numeric_to_integer(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-eval-metric-numeric-to-int"
+        self.flag_key = "eval-metric-numeric-to-int-flag"
+        rc.tracer_rc_state.set_config(
+            f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key, variation_type="NUMERIC")
+        ).apply()
+
+        # Evaluate NUMERIC flag as INTEGER → type mismatch
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "INTEGER",
+                "defaultValue": 0,
+                "targetingKey": "user-1",
+                "attributes": {},
+            },
+        )
+
+    def test_ffe_eval_metric_numeric_to_integer(self):
+        """Test that NUMERIC-to-INTEGER evaluation produces error.type:type_mismatch."""
+        assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}"
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "error", (
+            f"Expected reason 'error' for type mismatch, got tags: {tags}"
+        )
+        assert get_tag_value(tags, "error.type") == "type_mismatch", (
+            f"Expected error.type 'type_mismatch', got tags: {tags}"
+        )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+@irrelevant(
+    context.library == "golang",
+    reason="Go validates regex at config load time and rejects invalid patterns upfront",
+)
+class Test_FFE_Eval_Metric_Parse_Error:
+    """Test that an invalid regex pattern produces error.type=parse_error.
+
+    This configures a flag with a MATCHES condition containing an invalid regex pattern
+    (e.g., "[invalid" which has an unclosed bracket). When the condition is evaluated,
+    the regex compilation fails and produces a parse_error.
+
+    Behavioral differences across SDKs:
+    - Python (libdatadog): Returns parse_error during evaluation
+    - Go: Validates regex at config load time, rejects config with invalid regex
+    """
+
+    def setup_ffe_eval_metric_parse_error(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-eval-metric-parse-error"
+        self.flag_key = "eval-metric-parse-error-flag"
+        rc.tracer_rc_state.set_config(
+            f"{RC_PATH}/{config_id}/config", make_invalid_regex_fixture(self.flag_key)
+        ).apply()
+
+        # Evaluate the flag with an attribute that triggers the invalid regex condition
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {"email": "test@example.com"},  # Triggers MATCHES condition
+            },
+        )
+
+    def test_ffe_eval_metric_parse_error(self):
+        """Test that invalid regex produces error.type:parse_error."""
+        assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}"
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "error", (
+            f"Expected reason 'error' for parse error, got tags: {tags}"
+        )
+        assert get_tag_value(tags, "error.type") == "parse_error", (
+            f"Expected error.type 'parse_error', got tags: {tags}"
+        )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_No_Config_Loaded:
+    """Test that evaluating a flag when no configuration is loaded produces error metrics.
+
+    When no FFE configuration has been loaded, tracers should return:
+    - feature_flag.result.reason = "error"
+    - error.type = "provider_not_ready"
+    """
+
+    def setup_ffe_eval_no_config_loaded(self):
+        # Reset RC state and do NOT load any configuration
+        rc.tracer_rc_state.reset().apply()
+
+        # Evaluate a flag without any config loaded
+        self.flag_key = "no-config-flag"
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {},
+            },
+        )
+
+    def test_ffe_eval_no_config_loaded(self):
+        """Test that no config loaded produces reason=error and error.type=provider_not_ready.
+
+        This ensures cross-tracer consistency for the 'no config loaded' scenario.
+        """
+        assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, (
+            f"Expected metric for flag '{self.flag_key}' with no config, found none. All: {find_eval_metrics()}"
+        )
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "error", (
+            f"Expected reason 'error' when no config loaded, got tags: {tags}"
+        )
+        assert get_tag_value(tags, "error.type") == "provider_not_ready", (
+            f"Expected error.type 'provider_not_ready' when no config loaded, got tags: {tags}"
+        )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+@irrelevant(
+    context.library == "nodejs", reason="JS SDK requires targeting key and returns TARGETING_KEY_MISSING when missing"
+)
+class Test_FFE_Eval_Targeting_Key_Optional:
+    """Test that flag evaluation succeeds without a targeting key.
+
+    The OpenFeature spec defines TARGETING_KEY_MISSING as an error code, but the
+    targeting key is optional in the spec. Most Datadog providers do not require
+    a targeting key for flag evaluation:
+
+    - Evaluations without sharding work without a targeting key
+    - Only shard-based allocations need a targeting key (and they simply won't match)
+
+    Behavioral differences:
+    - Python, Go, Ruby, Java, .NET: Targeting key is optional, evaluation succeeds
+    - JS: Requires targeting key, returns TARGETING_KEY_MISSING error
+
+    This test verifies that evaluation succeeds without a targeting key for most SDKs.
+    """
+
+    def setup_ffe_eval_targeting_key_optional(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-targeting-key-optional"
+        self.flag_key = "targeting-key-optional-flag"
+        rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key)).apply()
+
+        # Evaluate without a targeting key - should still succeed for most SDKs
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "",  # Empty targeting key
+                "attributes": {},
+            },
+        )
+
+    def test_ffe_eval_targeting_key_optional(self):
+        """Test that evaluation succeeds without targeting key (no TARGETING_KEY_MISSING error)."""
+        assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none. All: {find_eval_metrics()}"
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        # Should NOT be an error - targeting key is optional
+        reason = get_tag_value(tags, "feature_flag.result.reason")
+        assert reason != "error", f"Expected successful evaluation without targeting key, but got error. Tags: {tags}"
+
+        # Should not have TARGETING_KEY_MISSING error
+        error_type = get_tag_value(tags, "error.type")
+        assert error_type != "targeting_key_missing", (
+            f"Got TARGETING_KEY_MISSING error but targeting key should be optional. Tags: {tags}"
+        )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+@irrelevant(
+    context.library == "golang",
+    reason="Go flattens nested attributes to dot notation instead of returning INVALID_CONTEXT",
+)
+@irrelevant(
+    context.library == "ruby",
+    reason="Ruby silently skips unsupported attribute types instead of returning INVALID_CONTEXT",
+)
+@irrelevant(
+    context.library == "java", reason="Java uses INVALID_CONTEXT only for null context, not for nested attributes"
+)
+@irrelevant(
+    context.library == "dotnet", reason=".NET INVALID_CONTEXT behavior for nested attributes is not yet standardized"
+)
+@irrelevant(context.library == "nodejs", reason="JS SDK does not use INVALID_CONTEXT error code")
+class Test_FFE_Eval_Invalid_Context_Nested_Attribute:
+    """Test that nested/unsupported attribute types produce error.type=invalid_context.
+
+    The datadog-ffe native library (used by Python) only supports primitive attribute types:
+    str, int, float, bool, and None. Nested objects (dicts) and lists are NOT
+    supported and will trigger an INVALID_CONTEXT error.
+
+    Behavioral differences across SDKs for nested attributes:
+    - Python: Returns INVALID_CONTEXT (PyO3 conversion failure)
+    - Go: Flattens to dot notation (e.g., {"a": {"b": 1}} → {"a.b": 1})
+    - Ruby: Silently skips unsupported attribute types
+    - Java: Uses INVALID_CONTEXT only for null context, not nested attributes
+    - .NET: Relies on native library; behavior not yet standardized
+    - JS: Does not use INVALID_CONTEXT at all
+
+    This test currently only runs for Python.
+    """
+
+    def setup_ffe_eval_invalid_context_nested_attribute(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-invalid-context"
+        self.flag_key = "invalid-context-flag"
+        rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key)).apply()
+
+        # Pass a nested dict as an attribute value - this should trigger INVALID_CONTEXT
+        # The native library only supports: str, int, float, bool, None
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {"nested": {"inner": "value"}},  # Nested dict - not supported
+            },
+        )
+
+    def test_ffe_eval_invalid_context_nested_attribute(self):
+        """Test that nested attribute values produce error.type=invalid_context."""
+        assert self.r.status_code == 200, f"Flag evaluation request failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, (
+            f"Expected metric for flag '{self.flag_key}' with invalid context, found none. All: {find_eval_metrics()}"
+        )
+
+        point = metrics[0]
+        tags = point.get("tags", [])
+
+        assert get_tag_value(tags, "feature_flag.result.reason") == "error", (
+            f"Expected reason 'error' for invalid context, got tags: {tags}"
+        )
+        assert get_tag_value(tags, "error.type") == "invalid_context", (
+            f"Expected error.type 'invalid_context' for nested attribute, got tags: {tags}"
+        )
+
+
+@scenarios.feature_flagging_and_experimentation
+@features.feature_flags_eval_metrics
+class Test_FFE_Eval_Lowercase_Consistency:
+    """Test that all metric tag values are lowercase.
+
+    OpenFeature telemetry conventions require lowercase values for reason and error codes.
+    This test ensures tracers emit lowercase values without relying on fallback logic.
+    """
+
+    def setup_ffe_lowercase_reason(self):
+        rc.tracer_rc_state.reset().apply()
+
+        config_id = "ffe-lowercase-test"
+        self.flag_key = "lowercase-test-flag"
+        rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_ufc_fixture(self.flag_key)).apply()
+
+        self.r = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {},
+            },
+        )
+
+    def test_ffe_lowercase_reason(self):
+        """Test that reason values are lowercase."""
+        assert self.r.status_code == 200, f"Flag evaluation failed: {self.r.text}"
+
+        metrics = find_eval_metrics(self.flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.flag_key}', found none."
+
+        for point in metrics:
+            tags = point.get("tags", [])
+            reason = get_tag_value(tags, "feature_flag.result.reason")
+            if reason:
+                assert reason == reason.lower(), f"Reason '{reason}' is not lowercase. Tags: {tags}"
+
+    def setup_ffe_lowercase_error_type(self):
+        rc.tracer_rc_state.reset().apply()
+
+        # Set up config with a different flag than what we'll request
+        config_id = "ffe-lowercase-error-test"
+        rc.tracer_rc_state.set_config(f"{RC_PATH}/{config_id}/config", make_ufc_fixture("some-other-flag")).apply()
+
+        # Request non-existent flag to trigger error
+        self.error_flag_key = "lowercase-error-flag"
+        self.r_error = weblog.post(
+            "/ffe",
+            json={
+                "flag": self.error_flag_key,
+                "variationType": "STRING",
+                "defaultValue": "default",
+                "targetingKey": "user-1",
+                "attributes": {},
+            },
+        )
+
+    def test_ffe_lowercase_error_type(self):
+        """Test that error.type values are lowercase."""
+        assert self.r_error.status_code == 200, f"Flag evaluation request failed: {self.r_error.text}"
+
+        metrics = find_eval_metrics(self.error_flag_key)
+        assert len(metrics) > 0, f"Expected metric for flag '{self.error_flag_key}', found none."
+
+        for point in metrics:
+            tags = point.get("tags", [])
+            error_type = get_tag_value(tags, "error.type")
+            if error_type:
+                assert error_type == error_type.lower(), f"Error type '{error_type}' is not lowercase. Tags: {tags}"
diff --git a/utils/_context/_scenarios/__init__.py b/utils/_context/_scenarios/__init__.py
index 555e574f30f..db6f4e6c1e0 100644
--- a/utils/_context/_scenarios/__init__.py
+++ b/utils/_context/_scenarios/__init__.py
@@ -544,6 +544,7 @@ class _Scenarios:
             "DD_EXPERIMENTAL_FLAGGING_PROVIDER_ENABLED": "true",
             "DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS": "0.2",
             "DD_METRICS_OTEL_ENABLED": "true",
+            "OTEL_EXPORTER_OTLP_METRICS_PROTOCOL": "http/protobuf",
             "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT": "http://agent:4318/v1/metrics",
         },
         agent_interface_timeout=30,
diff --git a/utils/_features.py b/utils/_features.py
index fde956fd590..521e3d67364 100644
--- a/utils/_features.py
+++ b/utils/_features.py
@@ -2566,6 +2566,14 @@ def feature_flags_exposures(test_object):
         """
         return _mark_test_object(test_object, feature_id=535, owner=_Owner.ffe)
 
+    @staticmethod
+    def feature_flags_eval_metrics(test_object):
+        """Feature Flags Evaluation Metrics
+
+        https://feature-parity.us1.prod.dog/#/?feature=548
+        """
+        return _mark_test_object(test_object, feature_id=548, owner=_Owner.ffe)
+
     @staticmethod
     def appsec_extended_data_collection(test_object):
         """AppSec supports extended data collection including headers and body
diff --git a/utils/build/docker/python/django-poc.Dockerfile b/utils/build/docker/python/django-poc.Dockerfile
index b1b6ac55883..ad7be4d193a 100644
--- a/utils/build/docker/python/django-poc.Dockerfile
+++ b/utils/build/docker/python/django-poc.Dockerfile
@@ -6,6 +6,9 @@ WORKDIR /app
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/django /app
 COPY utils/build/docker/python/iast.py /app/iast.py
 
diff --git a/utils/build/docker/python/django-py3.13.Dockerfile b/utils/build/docker/python/django-py3.13.Dockerfile
index 908b03ff9c8..c4f0050d358 100644
--- a/utils/build/docker/python/django-py3.13.Dockerfile
+++ b/utils/build/docker/python/django-py3.13.Dockerfile
@@ -5,6 +5,9 @@ WORKDIR /app
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/django /app
 COPY utils/build/docker/python/iast.py /app/iast.py
 
diff --git a/utils/build/docker/python/fastapi.Dockerfile b/utils/build/docker/python/fastapi.Dockerfile
index 8c3dc261a69..3588dcb4f98 100644
--- a/utils/build/docker/python/fastapi.Dockerfile
+++ b/utils/build/docker/python/fastapi.Dockerfile
@@ -5,6 +5,9 @@ WORKDIR /app
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/fastapi/app.sh /app/app.sh
 COPY utils/build/docker/python/fastapi/main.py /app/main.py
 COPY utils/build/docker/python/fastapi/log_conf.yaml /app/log_conf.yaml
diff --git a/utils/build/docker/python/flask-poc.Dockerfile b/utils/build/docker/python/flask-poc.Dockerfile
index acfdba35560..6855d928228 100644
--- a/utils/build/docker/python/flask-poc.Dockerfile
+++ b/utils/build/docker/python/flask-poc.Dockerfile
@@ -5,6 +5,9 @@ WORKDIR /app
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/flask /app
 COPY utils/build/docker/python/iast.py /app/iast.py
 
diff --git a/utils/build/docker/python/python3.12.Dockerfile b/utils/build/docker/python/python3.12.Dockerfile
index 89572686d96..2383ada3f43 100644
--- a/utils/build/docker/python/python3.12.Dockerfile
+++ b/utils/build/docker/python/python3.12.Dockerfile
@@ -5,6 +5,9 @@ WORKDIR /app
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/django /app
 COPY utils/build/docker/python/iast.py /app/iast.py
 
diff --git a/utils/build/docker/python/tornado.Dockerfile b/utils/build/docker/python/tornado.Dockerfile
index e2721130254..2e7e1fa98e5 100644
--- a/utils/build/docker/python/tornado.Dockerfile
+++ b/utils/build/docker/python/tornado.Dockerfile
@@ -8,6 +8,9 @@ ENV DD_REMOTECONFIG_POLL_SECONDS=1
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/tornado/app.sh /app/app.sh
 COPY utils/build/docker/python/tornado/main.py /app/main.py
 COPY utils/build/docker/python/iast.py /app/iast.py
diff --git a/utils/build/docker/python/uds-flask.Dockerfile b/utils/build/docker/python/uds-flask.Dockerfile
index e767625b1ec..8d17aa066af 100644
--- a/utils/build/docker/python/uds-flask.Dockerfile
+++ b/utils/build/docker/python/uds-flask.Dockerfile
@@ -5,6 +5,9 @@ WORKDIR /app
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/flask /app
 COPY utils/build/docker/python/iast.py /app/iast.py
 
diff --git a/utils/build/docker/python/uwsgi-poc.Dockerfile b/utils/build/docker/python/uwsgi-poc.Dockerfile
index 0a2facd4534..6a42951b1ac 100644
--- a/utils/build/docker/python/uwsgi-poc.Dockerfile
+++ b/utils/build/docker/python/uwsgi-poc.Dockerfile
@@ -5,6 +5,9 @@ WORKDIR /app
 COPY utils/build/docker/python/install_ddtrace.sh binaries* /binaries/
 RUN /binaries/install_ddtrace.sh
 
+# Install OTel OTLP exporter for FFE metrics
+RUN pip install opentelemetry-exporter-otlp-proto-http==1.40.0
+
 COPY utils/build/docker/python/flask /app
 COPY utils/build/docker/python/flask/uwsgi_app.sh /app/app.sh
 COPY utils/build/docker/python/iast.py /app/iast.py