diff --git a/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/bootstrap/BraintrustBridge.java b/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/bootstrap/BraintrustBridge.java index a898dcbd..2bb8eaa3 100644 --- a/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/bootstrap/BraintrustBridge.java +++ b/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/bootstrap/BraintrustBridge.java @@ -1,5 +1,6 @@ package dev.braintrust.bootstrap; +import java.net.URL; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -22,10 +23,14 @@ public static BraintrustClassLoader getAgentClassLoader() { return agentClassLoaderRef.get(); } - public static void setAgentClassLoaderIfAbsent(BraintrustClassLoader classLoader) { - var witness = agentClassLoaderRef.compareAndExchange(null, classLoader); + public static BraintrustClassLoader createBraintrustClassLoader( + URL agentJarURL, ClassLoader btClassLoaderParent) throws Exception { + BraintrustClassLoader btClassLoader = + new BraintrustClassLoader(agentJarURL, btClassLoaderParent); + var witness = agentClassLoaderRef.compareAndExchange(null, btClassLoader); if (null != witness) { throw new IllegalStateException("agent classloader must only be set once"); } + return btClassLoader; } } diff --git a/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/system/AgentBootstrap.java b/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/system/AgentBootstrap.java index 2d2a2b58..1803e8e8 100644 --- a/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/system/AgentBootstrap.java +++ b/braintrust-java-agent/bootstrap/src/main/java/dev/braintrust/system/AgentBootstrap.java @@ -1,10 +1,9 @@ package dev.braintrust.system; -import dev.braintrust.bootstrap.BraintrustBridge; -import dev.braintrust.bootstrap.BraintrustClassLoader; import java.io.File; import java.lang.instrument.Instrumentation; import java.net.URL; +import java.net.URLClassLoader; import java.util.jar.JarFile; /** @@ -44,18 +43,16 @@ private static synchronized void install(String agentArgs, Instrumentation inst) if (jvmRunningWithOtelAgent()) { log( - "ERROR: Braintrust agent is not yet compatible with the OTel javaagent -" - + " skipping install."); + "ERROR: Braintrust agent is not yet compatible with the OTel -javaagent." + + " aborting install."); return; } - - if (jvmRunningWithDatadogOtel()) { - log( - "ERROR: Braintrust agent is not yet compatible with datadog javaagent otel -" - + " skipping install."); + if (jvmRunningWithDatadogOtelConfig() && (!isRunningAfterDatadogAgent())) { + log("ERROR: Braintrust agent must run _after_ datadog -javaagent. aborting install."); return; } + boolean installOnBootstrap = !jvmRunningWithDatadogOtelConfig(); try { // Locate the agent JAR from our own code source URL agentJarURL = @@ -67,19 +64,21 @@ private static synchronized void install(String agentArgs, Instrumentation inst) // are set before anything can trigger GlobalOpenTelemetry.get(). enableOtelSDKAutoconfiguration(); - inst.appendToBootstrapClassLoaderSearch(new JarFile(agentJarFile, false)); - log("Added agent JAR to bootstrap classpath."); - - // Create the isolated braintrust classloader. - // Parent is the platform classloader so agent internals can see: - // - Bootstrap classes (OTel API/SDK added via appendToBootstrapClassLoaderSearch) - // - JDK platform modules (java.net.http, java.sql, etc.) - // but NOT application classes (those are on the system/app classloader). - BraintrustClassLoader btClassLoader = - new BraintrustClassLoader(agentJarURL, ClassLoader.getPlatformClassLoader()); - BraintrustBridge.setAgentClassLoaderIfAbsent(btClassLoader); + ClassLoader btClassLoaderParent; + if (installOnBootstrap) { + inst.appendToBootstrapClassLoaderSearch(new JarFile(agentJarFile, false)); + btClassLoaderParent = ClassLoader.getPlatformClassLoader(); + log("Added agent JAR to bootstrap classpath."); + } else { + btClassLoaderParent = + new URLClassLoader( + new URL[] {agentJarFile.toURI().toURL()}, + ClassLoader.getPlatformClassLoader()); + log("skipping bootstrap classpath setup"); + } // Load and invoke the real agent installer through the isolated classloader. + ClassLoader btClassLoader = createBTClassLoader(agentJarURL, btClassLoaderParent); Class installerClass = btClassLoader.loadClass(AGENT_CLASS); installerClass .getMethod(INSTALLER_METHOD, String.class, Instrumentation.class) @@ -92,6 +91,16 @@ private static synchronized void install(String agentArgs, Instrumentation inst) } } + private static ClassLoader createBTClassLoader(URL agentJarURL, ClassLoader btClassLoaderParent) + throws Exception { + // NOTE: not caching because we only invoke this once + var bridgeClass = + btClassLoaderParent.loadClass("dev.braintrust.bootstrap.BraintrustBridge"); + var createMethod = + bridgeClass.getMethod("createBraintrustClassLoader", URL.class, ClassLoader.class); + return (ClassLoader) createMethod.invoke(null, agentJarURL, btClassLoaderParent); + } + /** * Checks whether the OpenTelemetry Java agent is present by looking for its premain class on * the system classloader. Since {@code -javaagent} JARs are always on the system classpath, @@ -109,16 +118,8 @@ private static boolean jvmRunningWithOtelAgent() { } } - /** - * Checks whether the Datadog agent is present and configured for OTel integration. Must be - * callable from the system classloader (no DD compile deps). - */ - private static boolean jvmRunningWithDatadogOtel() { - try { - Class.forName("datadog.trace.bootstrap.Agent", false, null); - } catch (ClassNotFoundException e) { - return false; - } + /** Checks whether the Datadog agent is present and configured for OTel integration */ + private static boolean jvmRunningWithDatadogOtelConfig() { String sysProp = System.getProperty("dd.trace.otel.enabled"); if (sysProp != null) { return Boolean.parseBoolean(sysProp); @@ -131,7 +132,7 @@ private static boolean jvmRunningWithDatadogOtel() { * Returns true if the Datadog agent's premain has already executed, meaning it was listed * before the Braintrust agent in the {@code -javaagent} flags. */ - static boolean isRunningAfterDatadogAgent() { + private static boolean isRunningAfterDatadogAgent() { // DD's premain appends its jars to the bootstrap classpath, making // {@code datadog.trace.bootstrap.Agent} loadable from the bootstrap (null) // classloader. If that class is not found on bootstrap, DD either isn't diff --git a/braintrust-java-agent/internal/build.gradle b/braintrust-java-agent/internal/build.gradle index d176c784..02c7ae9c 100644 --- a/braintrust-java-agent/internal/build.gradle +++ b/braintrust-java-agent/internal/build.gradle @@ -40,6 +40,9 @@ dependencies { // These are the heavy deps that stay in BraintrustClassLoader, NOT on bootstrap. implementation "io.opentelemetry:opentelemetry-exporter-otlp:${otelVersion}" + // for dd compat mode + compileOnly 'com.datadoghq:dd-trace-api:1.60.1' + // Test dependencies testImplementation project(':braintrust-java-agent:bootstrap') testImplementation "org.junit.jupiter:junit-jupiter:${junitVersion}" diff --git a/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/BraintrustAgent.java b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/BraintrustAgent.java index 1000f8cd..f8721023 100644 --- a/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/BraintrustAgent.java +++ b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/BraintrustAgent.java @@ -2,6 +2,7 @@ import com.google.auto.service.AutoService; import dev.braintrust.Braintrust; +import dev.braintrust.agent.dd.BTInterceptor; import dev.braintrust.bootstrap.BraintrustBridge; import dev.braintrust.bootstrap.BraintrustClassLoader; import dev.braintrust.instrumentation.Instrumenter; @@ -21,7 +22,8 @@ public class BraintrustAgent implements AutoConfigurationCustomizerProvider { public static void install(String agentArgs, Instrumentation inst) { if (!(BraintrustAgent.class.getClassLoader() instanceof BraintrustClassLoader)) { throw new IllegalStateException( - "Braintrust agent can only run on a braintrust classloader"); + "Braintrust agent can only run on a braintrust classloader: " + + BraintrustAgent.class.getClassLoader()); } log.info( "invoked on classloader: {}", @@ -31,6 +33,9 @@ public static void install(String agentArgs, Instrumentation inst) { // Fail fast if there are any issues with the Braintrust SDK Braintrust.get(); Instrumenter.install(inst, BraintrustAgent.class.getClassLoader()); + if (jvmRunningWithDatadogOtelConfig() && ddApiOnBootstrapClasspath()) { + BTInterceptor.install(); + } } @Override @@ -51,4 +56,24 @@ public void customize(AutoConfigurationCustomizer autoConfiguration) { return sdkTracerProviderBuilder; })); } + + /** Checks whether the Datadog agent is present and configured for OTel integration */ + private static boolean ddApiOnBootstrapClasspath() { + try { + BraintrustAgent.class.getClassLoader().loadClass("datadog.trace.api.GlobalTracer"); + return true; + } catch (ClassNotFoundException e) { + return false; + } + } + + /** Checks whether the Datadog agent is present and configured for OTel integration */ + private static boolean jvmRunningWithDatadogOtelConfig() { + String sysProp = System.getProperty("dd.trace.otel.enabled"); + if (sysProp != null) { + return Boolean.parseBoolean(sysProp); + } + String envVar = System.getenv("DD_TRACE_OTEL_ENABLED"); + return Boolean.parseBoolean(envVar); + } } diff --git a/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/BTInterceptor.java b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/BTInterceptor.java new file mode 100644 index 00000000..9843bc06 --- /dev/null +++ b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/BTInterceptor.java @@ -0,0 +1,78 @@ +package dev.braintrust.agent.dd; + +import datadog.trace.api.GlobalTracer; +import datadog.trace.api.interceptor.MutableSpan; +import datadog.trace.api.interceptor.TraceInterceptor; +import dev.braintrust.Braintrust; +import dev.braintrust.trace.BraintrustTracing; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.sdk.logs.SdkLoggerProvider; +import io.opentelemetry.sdk.metrics.SdkMeterProvider; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.data.SpanData; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class BTInterceptor implements TraceInterceptor { + private static final AtomicBoolean installed = new AtomicBoolean(false); + + public static void install() { + if (!installed.compareAndExchange(false, true)) { + try { + if (!DDSpanConverter.initialize()) { + log.warn( + "failed to initialize DD span converter. Braintrust traces will not be" + + " reported."); + return; + } + var tbBuilder = + SdkTracerProvider.builder().setIdGenerator(OverridableIdGenerator.INSTANCE); + Braintrust.get() + .openTelemetryEnable( + tbBuilder, SdkLoggerProvider.builder(), SdkMeterProvider.builder()); + final var traceProvider = tbBuilder.build(); + var interceptor = new BTInterceptor(999, traceProvider); + if (!GlobalTracer.get().addTraceInterceptor(interceptor)) { + log.warn( + "trace interceptor install failed due to conflicting priorities." + + " Braintrust traces will not be reported."); + return; + } + log.info("trace interceptor successfully installed"); + } catch (Exception e) { + log.warn( + "trace interceptor install failed. Braintrust traces will not be reported.", + e); + // Don't reset the flag. We don't want to try again. + } + } + } + + private final int priority; + private final Tracer tracer; + + private BTInterceptor(int priority, SdkTracerProvider traceProvider) { + this.priority = priority; + this.tracer = BraintrustTracing.getTracer(traceProvider); + } + + @Override + public int priority() { + return priority; + } + + @Override + public Collection onTraceComplete( + Collection trace) { + try { + List spanDataList = DDSpanConverter.convertTrace(List.copyOf(trace)); + DDSpanConverter.replayTrace(tracer, spanDataList); + } catch (Exception e) { + log.debug("failed to replay traces", e); + } + return trace; + } +} diff --git a/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/DDSpanConverter.java b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/DDSpanConverter.java new file mode 100644 index 00000000..ac2348c2 --- /dev/null +++ b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/DDSpanConverter.java @@ -0,0 +1,375 @@ +package dev.braintrust.agent.dd; + +import datadog.trace.api.DDTraceId; +import datadog.trace.api.interceptor.MutableSpan; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.common.AttributesBuilder; +import io.opentelemetry.api.trace.*; +import io.opentelemetry.context.Context; +import io.opentelemetry.sdk.common.InstrumentationScopeInfo; +import io.opentelemetry.sdk.resources.Resource; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.data.StatusData; +import java.lang.reflect.Method; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; + +/** + * Utility for converting Datadog {@link MutableSpan}s to OTel {@link SpanData} and replaying them + * on an OTel {@link Tracer}. + */ +@Slf4j +public class DDSpanConverter { + private static final AtomicReference contextMethod = new AtomicReference<>(); + private static final AtomicReference getTraceIdMethod = new AtomicReference<>(); + private static final AtomicReference getSpanIdMethod = new AtomicReference<>(); + private static final AtomicBoolean successfulInit = new AtomicBoolean(false); + private static final Map, Optional> getParentIdMethods = + Collections.synchronizedMap(new WeakHashMap<>()); + + static { + initialize(); + } + + /** + * Initialize reflection handles for extracting IDs from DD spans. Must be called before {@link + * #convertTrace} or {@link #replayTrace}. + */ + static synchronized boolean initialize() { + if (successfulInit.get()) { + return true; + } + try { + Class agentSpanClass = + Class.forName( + "datadog.trace.bootstrap.instrumentation.api.AgentSpan", true, null); + contextMethod.set(agentSpanClass.getMethod("context")); + + Class agentSpanContextClass = + Class.forName( + "datadog.trace.bootstrap.instrumentation.api.AgentSpanContext", + true, + null); + getTraceIdMethod.set(agentSpanContextClass.getMethod("getTraceId")); + getSpanIdMethod.set(agentSpanContextClass.getMethod("getSpanId")); + + log.debug("DD span converter reflection initialized successfully."); + successfulInit.set(true); + return true; + } catch (Exception e) { + log.error( + "Failed to initialize DD span converter reflection — span conversion will not" + + " work", + e); + return false; + } + } + + /** Converts a list of DD {@link MutableSpan}s (one trace) to OTel {@link SpanData}. */ + static List convertTrace(List mutableSpans) throws Exception { + if (!successfulInit.get()) { + log.debug("Reflection not initialized — cannot convert DD spans"); + return Collections.emptyList(); + } + + List result = new ArrayList<>(mutableSpans.size()); + for (MutableSpan ddSpan : mutableSpans) { + try { + result.add(convertSpan(ddSpan)); + } catch (Exception e) { + log.warn( + "Failed to convert DD span '{}': {}", + ddSpan.getResourceName(), + e.getMessage()); + } + } + return result; + } + + /** + * Replays a list of already-converted {@link SpanData} on the given OTel {@link Tracer}. + * + *

Spans are topologically sorted (parents before children) so that parent contexts are + * available when starting child spans. Each span is started and immediately ended with the + * original timestamps, attributes, status, and kind preserved. + */ + static void replayTrace(Tracer tracer, List spans) { + if (spans == null || spans.isEmpty()) { + return; + } + + // Sort spans so parents come before children (topological order by parent relationship). + List sorted = topologicalSort(spans); + + // Map from spanId -> OTel Context containing that span, so children can link to parents. + Map spanContextMap = new HashMap<>(); + + for (SpanData sd : sorted) { + SpanBuilder builder = + tracer.spanBuilder(sd.getName()) + .setSpanKind(sd.getKind()) + .setStartTimestamp(sd.getStartEpochNanos(), TimeUnit.NANOSECONDS); + + // Set all attributes + sd.getAttributes().forEach((key, value) -> setAttributeUnchecked(builder, key, value)); + + // Link to parent context if available + String parentSpanId = sd.getParentSpanContext().getSpanId(); + if (sd.getParentSpanContext().isValid()) { + Context parentCtx = spanContextMap.get(parentSpanId); + if (parentCtx != null) { + builder.setParent(parentCtx); + } else { + // Parent not in this batch — create a remote parent context + builder.setParent(Context.current().with(Span.wrap(sd.getParentSpanContext()))); + } + } else { + builder.setNoParent(); + } + + // Set the original IDs so the OverridableIdGenerator returns them + // instead of generating new random ones. + // For root spans: override both traceId and spanId. + // For child spans: only spanId (the SDK inherits traceId from parent context). + if (!sd.getParentSpanContext().isValid()) { + OverridableIdGenerator.setNextIds( + sd.getSpanContext().getTraceId(), sd.getSpanContext().getSpanId()); + } else { + OverridableIdGenerator.setNextIds(null, sd.getSpanContext().getSpanId()); + } + + Span span; + try { + span = builder.startSpan(); + } finally { + OverridableIdGenerator.clear(); + } + + // Set status + if (sd.getStatus().getStatusCode() == StatusCode.ERROR) { + span.setStatus(StatusCode.ERROR, sd.getStatus().getDescription()); + } else if (sd.getStatus().getStatusCode() == StatusCode.OK) { + span.setStatus(StatusCode.OK, sd.getStatus().getDescription()); + } + + // Store the context for potential children + Context ctx = Context.current().with(span); + spanContextMap.put(sd.getSpanContext().getSpanId(), ctx); + + // End with original end timestamp + span.end(sd.getEndEpochNanos(), TimeUnit.NANOSECONDS); + } + } + + @SuppressWarnings("unchecked") + private static void setAttributeUnchecked( + SpanBuilder builder, io.opentelemetry.api.common.AttributeKey key, Object value) { + builder.setAttribute((io.opentelemetry.api.common.AttributeKey) key, (T) value); + } + + /** + * Topologically sorts spans so that parent spans appear before their children. Spans without + * parents in the list come first. + */ + private static List topologicalSort(List spans) { + // Build adjacency: parentSpanId -> list of children + Map> childrenOf = new HashMap<>(); + Set spanIds = new HashSet<>(); + + for (SpanData sd : spans) { + String id = sd.getSpanContext().getSpanId(); + spanIds.add(id); + } + + List roots = new ArrayList<>(); + for (SpanData sd : spans) { + String parentId = sd.getParentSpanContext().getSpanId(); + if (!sd.getParentSpanContext().isValid() || !spanIds.contains(parentId)) { + roots.add(sd); + } else { + childrenOf.computeIfAbsent(parentId, k -> new ArrayList<>()).add(sd); + } + } + + // BFS from roots + List sorted = new ArrayList<>(spans.size()); + Deque queue = new ArrayDeque<>(roots); + while (!queue.isEmpty()) { + SpanData sd = queue.poll(); + sorted.add(sd); + List children = childrenOf.get(sd.getSpanContext().getSpanId()); + if (children != null) { + queue.addAll(children); + } + } + + // If there were cycles or disconnected spans, append any remaining + if (sorted.size() < spans.size()) { + Set added = new HashSet<>(); + for (SpanData sd : sorted) { + added.add(sd.getSpanContext().getSpanId()); + } + for (SpanData sd : spans) { + if (!added.contains(sd.getSpanContext().getSpanId())) { + sorted.add(sd); + } + } + } + + return sorted; + } + + private static SpanData convertSpan(MutableSpan ddSpan) throws Exception { + // Extract trace ID and span ID via reflection + Object agentSpanContext = contextMethod.get().invoke(ddSpan); + DDTraceId ddTraceId = (DDTraceId) getTraceIdMethod.get().invoke(agentSpanContext); + long ddSpanId = (long) getSpanIdMethod.get().invoke(agentSpanContext); + + // Find parent span ID via reflection + long reflectedParentId = 0; + try { + Optional getParentIdMethod = getParentIdMethod(agentSpanContext.getClass()); + if (getParentIdMethod.isPresent()) { + reflectedParentId = (long) getParentIdMethod.get().invoke(agentSpanContext); + } + } catch (Exception e) { + log.debug("Cannot determine parent span ID for '{}'", ddSpan.getResourceName()); + } + + return convertSpan(ddSpan, ddTraceId, ddSpanId, reflectedParentId); + } + + /** + * Converts a DD span to OTel {@link SpanData} using pre-extracted IDs. This overload is + * testable without the DD bootstrap classloader since it does not use reflection. + * + * @param ddSpan the DD span to convert + * @param ddTraceId the DD trace ID (already extracted) + * @param ddSpanId the DD span ID as a long (already extracted) + * @param parentId the parent span ID as a long, or 0 if this is a root span + */ + static SpanData convertSpan( + MutableSpan ddSpan, DDTraceId ddTraceId, long ddSpanId, long parentId) { + // Convert DD trace ID (DDTraceId) to OTel 32-hex-char trace ID + String traceIdHex = ddTraceId.toHexStringPadded(32); + + // Convert DD span ID (long) to OTel 16-hex-char span ID + String spanIdHex = String.format("%016x", ddSpanId); + + // Convert parent span ID + String parentSpanIdHex = SpanContext.getInvalid().getSpanId(); + if (parentId != 0) { + parentSpanIdHex = String.format("%016x", parentId); + } + + SpanContext spanContext = + SpanContext.create( + traceIdHex, spanIdHex, TraceFlags.getSampled(), TraceState.getDefault()); + + SpanContext parentSpanContext; + if (!parentSpanIdHex.equals(SpanContext.getInvalid().getSpanId())) { + parentSpanContext = + SpanContext.create( + traceIdHex, + parentSpanIdHex, + TraceFlags.getSampled(), + TraceState.getDefault()); + } else { + parentSpanContext = SpanContext.getInvalid(); + } + + Attributes attributes = convertTags(ddSpan.getTags()); + SpanKind spanKind = inferSpanKind(ddSpan); + + StatusData status = + ddSpan.isError() ? StatusData.create(StatusCode.ERROR, "") : StatusData.unset(); + + long startEpochNanos = ddSpan.getStartTime(); + long endEpochNanos = startEpochNanos + ddSpan.getDurationNano(); + + return new ImmutableSpanData( + spanContext, + parentSpanContext, + Resource.getDefault(), + InstrumentationScopeInfo.create("braintrust-dd"), + String.valueOf(ddSpan.getResourceName()), + spanKind, + startEpochNanos, + endEpochNanos, + attributes, + status); + } + + private static Optional getParentIdMethod(Class agentSpanContextClass) { + synchronized (getParentIdMethods) { + Optional cached = getParentIdMethods.get(agentSpanContextClass); + if (cached != null) { + return cached; + } + + Optional resolved; + try { + resolved = Optional.of(agentSpanContextClass.getMethod("getParentId")); + } catch (NoSuchMethodException e) { + resolved = Optional.empty(); + } + getParentIdMethods.put(agentSpanContextClass, resolved); + return resolved; + } + } + + /** DD internal span tags that should not be forwarded as OTel attributes. */ + private static final Set DROPPED_DD_TAGS = + Set.of( + "_dd.agent_psr", + "_dd.profiling.enabled", + "_dd.trace_span_attribute_schema", + "_sample_rate"); + + private static Attributes convertTags(Map tags) { + if (tags == null || tags.isEmpty()) { + return Attributes.empty(); + } + AttributesBuilder builder = Attributes.builder(); + for (var entry : tags.entrySet()) { + String key = entry.getKey(); + if (DROPPED_DD_TAGS.contains(key)) { + continue; + } + Object value = entry.getValue(); + if (value instanceof String s) { + builder.put(key, s); + } else if (value instanceof Long l) { + builder.put(key, l); + } else if (value instanceof Integer i) { + builder.put(key, (long) i); + } else if (value instanceof Double d) { + builder.put(key, d); + } else if (value instanceof Float f) { + builder.put(key, (double) f); + } else if (value instanceof Boolean b) { + builder.put(key, b); + } else if (value != null) { + builder.put(key, value.toString()); + } + } + return builder.build(); + } + + /** Infers OTel SpanKind from DD's operation name convention. */ + private static SpanKind inferSpanKind(MutableSpan ddSpan) { + CharSequence opName = ddSpan.getOperationName(); + if (opName == null) return SpanKind.INTERNAL; + String op = opName.toString(); + return switch (op) { + case "server.request", "server" -> SpanKind.SERVER; + case "client.request", "client" -> SpanKind.CLIENT; + case "producer" -> SpanKind.PRODUCER; + case "consumer" -> SpanKind.CONSUMER; + default -> SpanKind.INTERNAL; + }; + } +} diff --git a/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/ImmutableSpanData.java b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/ImmutableSpanData.java new file mode 100644 index 00000000..0b338113 --- /dev/null +++ b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/ImmutableSpanData.java @@ -0,0 +1,116 @@ +package dev.braintrust.agent.dd; + +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.trace.SpanContext; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.sdk.common.InstrumentationLibraryInfo; +import io.opentelemetry.sdk.common.InstrumentationScopeInfo; +import io.opentelemetry.sdk.resources.Resource; +import io.opentelemetry.sdk.trace.data.EventData; +import io.opentelemetry.sdk.trace.data.LinkData; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.data.StatusData; +import java.util.Collections; +import java.util.List; + +/** Immutable implementation of {@link SpanData} for converting DD spans to OTel format. */ +record ImmutableSpanData( + SpanContext spanContext, + SpanContext parentSpanContext, + Resource resource, + InstrumentationScopeInfo instrumentationScopeInfo, + String name, + SpanKind kind, + long startEpochNanos, + long endEpochNanos, + Attributes attributes, + StatusData status) + implements SpanData { + + @Override + public SpanContext getSpanContext() { + return spanContext; + } + + @Override + public SpanContext getParentSpanContext() { + return parentSpanContext; + } + + @Override + public Resource getResource() { + return resource; + } + + @Override + public InstrumentationScopeInfo getInstrumentationScopeInfo() { + return instrumentationScopeInfo; + } + + @Override + public String getName() { + return name; + } + + @Override + public SpanKind getKind() { + return kind; + } + + @Override + public long getStartEpochNanos() { + return startEpochNanos; + } + + @Override + public long getEndEpochNanos() { + return endEpochNanos; + } + + @Override + public Attributes getAttributes() { + return attributes; + } + + @Override + public StatusData getStatus() { + return status; + } + + @Override + public List getEvents() { + return Collections.emptyList(); + } + + @Override + public List getLinks() { + return Collections.emptyList(); + } + + @Override + public boolean hasEnded() { + return true; + } + + @Override + public int getTotalRecordedEvents() { + return 0; + } + + @Override + public int getTotalRecordedLinks() { + return 0; + } + + @Override + public int getTotalAttributeCount() { + return attributes.size(); + } + + @SuppressWarnings("deprecation") + @Override + public InstrumentationLibraryInfo getInstrumentationLibraryInfo() { + return InstrumentationLibraryInfo.create( + instrumentationScopeInfo.getName(), instrumentationScopeInfo.getVersion()); + } +} diff --git a/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/OverridableIdGenerator.java b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/OverridableIdGenerator.java new file mode 100644 index 00000000..b3bc2dd4 --- /dev/null +++ b/braintrust-java-agent/internal/src/main/java/dev/braintrust/agent/dd/OverridableIdGenerator.java @@ -0,0 +1,58 @@ +package dev.braintrust.agent.dd; + +import io.opentelemetry.sdk.trace.IdGenerator; + +/** + * An {@link IdGenerator} that supports thread-local overrides for trace and span IDs. When an + * override is set, the next call to {@link #generateTraceId()} or {@link #generateSpanId()} returns + * the override and clears it. When no override is set, delegates to {@link IdGenerator#random()}. + * + *

This is used by {@link DDSpanConverter#replayTrace} to preserve the original DD span/trace IDs + * when replaying converted spans through the OTel SDK pipeline. + */ +class OverridableIdGenerator implements IdGenerator { + + public static final OverridableIdGenerator INSTANCE = new OverridableIdGenerator(); + + private static final IdGenerator RANDOM = IdGenerator.random(); + + private static final ThreadLocal nextTraceId = new ThreadLocal<>(); + private static final ThreadLocal nextSpanId = new ThreadLocal<>(); + + private OverridableIdGenerator() {} + + /** + * Sets the trace and span IDs to return from the next {@link #generateTraceId()} and {@link + * #generateSpanId()} calls on the current thread. Each value is consumed (cleared) on use. + */ + public static void setNextIds(String traceId, String spanId) { + nextTraceId.set(traceId); + nextSpanId.set(spanId); + } + + /** Clears any pending overrides on the current thread. */ + public static void clear() { + nextTraceId.remove(); + nextSpanId.remove(); + } + + @Override + public String generateTraceId() { + String override = nextTraceId.get(); + if (override != null) { + nextTraceId.remove(); + return override; + } + return RANDOM.generateTraceId(); + } + + @Override + public String generateSpanId() { + String override = nextSpanId.get(); + if (override != null) { + nextSpanId.remove(); + return override; + } + return RANDOM.generateSpanId(); + } +} diff --git a/braintrust-java-agent/internal/src/test/java/dev/braintrust/agent/dd/DDSpanConverterTest.java b/braintrust-java-agent/internal/src/test/java/dev/braintrust/agent/dd/DDSpanConverterTest.java new file mode 100644 index 00000000..ca70f29d --- /dev/null +++ b/braintrust-java-agent/internal/src/test/java/dev/braintrust/agent/dd/DDSpanConverterTest.java @@ -0,0 +1,491 @@ +package dev.braintrust.agent.dd; + +import static org.junit.jupiter.api.Assertions.*; + +import datadog.trace.api.DDTraceId; +import datadog.trace.api.interceptor.MutableSpan; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.StatusCode; +import io.opentelemetry.sdk.testing.exporter.InMemorySpanExporter; +import io.opentelemetry.sdk.trace.SdkTracerProvider; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.opentelemetry.sdk.trace.export.SimpleSpanProcessor; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.junit.jupiter.api.Test; + +class DDSpanConverterTest { + + // ── Trace ID conversion ──────────────────────────────────────────────────── + + @Test + void traceIdConvertedTo32CharHex() { + var ddSpan = stubSpan("test-span", "internal", 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.fromHex("0123456789abcdef0123456789abcdef"); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals("0123456789abcdef0123456789abcdef", result.getSpanContext().getTraceId()); + } + + @Test + void traceIdZeroPaddedWhenSmall() { + var ddSpan = stubSpan("test-span", "internal", 1_000_000_000L, 500_000_000L); + // DDTraceId.from(long) creates a 64-bit trace ID — upper 64 bits are zero + DDTraceId traceId = DDTraceId.from(0x1234L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals("00000000000000000000000000001234", result.getSpanContext().getTraceId()); + } + + // ── Span ID conversion ───────────────────────────────────────────────────── + + @Test + void spanIdConvertedTo16CharHex() { + var ddSpan = stubSpan("test-span", "internal", 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 0xdeadbeefcafebabeL, 0L); + + assertEquals("deadbeefcafebabe", result.getSpanContext().getSpanId()); + } + + @Test + void spanIdZeroPaddedWhenSmall() { + var ddSpan = stubSpan("test-span", "internal", 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 0x00ffL, 0L); + + assertEquals("00000000000000ff", result.getSpanContext().getSpanId()); + } + + // ── Parent-child linking ─────────────────────────────────────────────────── + + @Test + void rootSpanHasInvalidParentContext() { + var ddSpan = stubSpan("root-span", "internal", 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.from(42L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertFalse(result.getParentSpanContext().isValid()); + } + + @Test + void childSpanHasValidParentContextWithMatchingTraceId() { + var ddSpan = stubSpan("child-span", "client.request", 2_000_000_000L, 300_000_000L); + DDTraceId traceId = DDTraceId.fromHex("aabbccdd11223344aabbccdd11223344"); + long spanId = 0x1111L; + long parentId = 0x2222L; + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, spanId, parentId); + + assertTrue(result.getParentSpanContext().isValid()); + assertEquals( + "aabbccdd11223344aabbccdd11223344", result.getParentSpanContext().getTraceId()); + assertEquals("0000000000002222", result.getParentSpanContext().getSpanId()); + } + + @Test + void distributedTraceLocalRootPreservesParent() { + // A local root span continuing a distributed trace has parentId != 0 + // (the parent lives in a remote service). The converter must still attach it. + var ddSpan = stubSpan("local-root", "server.request", 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.fromHex("aabbccdd11223344aabbccdd11223344"); + long spanId = 0x1111L; + long remoteParentId = 0x9999L; // parent span from upstream service + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, spanId, remoteParentId); + + assertTrue( + result.getParentSpanContext().isValid(), + "local root with remote parent should have valid parent context"); + assertEquals( + "aabbccdd11223344aabbccdd11223344", result.getParentSpanContext().getTraceId()); + assertEquals("0000000000009999", result.getParentSpanContext().getSpanId()); + } + + // ── Timestamps ───────────────────────────────────────────────────────────── + + @Test + void timestampsPreserved() { + long startNanos = 1_700_000_000_000_000_000L; // ~2023-11-14 + long durationNanos = 123_456_789L; + var ddSpan = stubSpan("timed-span", "internal", startNanos, durationNanos); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals(startNanos, result.getStartEpochNanos()); + assertEquals(startNanos + durationNanos, result.getEndEpochNanos()); + } + + @Test + void zeroDurationProducesEqualStartAndEnd() { + long startNanos = 5_000_000_000L; + var ddSpan = stubSpan("instant-span", "internal", startNanos, 0L); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals(startNanos, result.getStartEpochNanos()); + assertEquals(startNanos, result.getEndEpochNanos()); + } + + // ── Tag conversion ───────────────────────────────────────────────────────── + + @Test + void tagsConvertedToAttributes() { + Map tags = new HashMap<>(); + tags.put("string.tag", "hello"); + tags.put("long.tag", 42L); + tags.put("int.tag", 7); + tags.put("double.tag", 3.14); + tags.put("float.tag", 2.5f); + tags.put("bool.tag", true); + var ddSpan = stubSpan("tagged-span", "internal", 1_000_000_000L, 500_000_000L, tags, false); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + var attrs = result.getAttributes(); + assertEquals("hello", attrs.get(AttributeKey.stringKey("string.tag"))); + assertEquals(42L, attrs.get(AttributeKey.longKey("long.tag"))); + assertEquals(7L, attrs.get(AttributeKey.longKey("int.tag"))); // int → long + assertEquals(3.14, attrs.get(AttributeKey.doubleKey("double.tag"))); + assertEquals(2.5, attrs.get(AttributeKey.doubleKey("float.tag")), 0.01); // float → double + assertEquals(true, attrs.get(AttributeKey.booleanKey("bool.tag"))); + } + + // ── Dropped DD tags ──────────────────────────────────────────────────────── + + @Test + void ddInternalTagsDropped() { + Map tags = new HashMap<>(); + tags.put("_dd.agent_psr", 1.0); + tags.put("_dd.profiling.enabled", "false"); + tags.put("_dd.trace_span_attribute_schema", "v1"); + tags.put("_sample_rate", 1.0); + tags.put("keep.this", "yes"); + var ddSpan = + stubSpan("filtered-span", "internal", 1_000_000_000L, 500_000_000L, tags, false); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + var attrs = result.getAttributes(); + assertNull(attrs.get(AttributeKey.doubleKey("_dd.agent_psr"))); + assertNull(attrs.get(AttributeKey.stringKey("_dd.profiling.enabled"))); + assertNull(attrs.get(AttributeKey.stringKey("_dd.trace_span_attribute_schema"))); + assertNull(attrs.get(AttributeKey.doubleKey("_sample_rate"))); + assertEquals("yes", attrs.get(AttributeKey.stringKey("keep.this"))); + } + + // ── SpanKind inference ───────────────────────────────────────────────────── + + @Test + void spanKindInferredFromOperationName() { + record Case(String opName, SpanKind expected) {} + var cases = + List.of( + new Case("internal", SpanKind.INTERNAL), + new Case("server.request", SpanKind.SERVER), + new Case("server", SpanKind.SERVER), + new Case("client.request", SpanKind.CLIENT), + new Case("client", SpanKind.CLIENT), + new Case("producer", SpanKind.PRODUCER), + new Case("consumer", SpanKind.CONSUMER), + new Case("unknown.op", SpanKind.INTERNAL), + new Case("", SpanKind.INTERNAL)); + + DDTraceId traceId = DDTraceId.from(1L); + for (var c : cases) { + var ddSpan = stubSpan("kind-test", c.opName, 1_000_000_000L, 500_000_000L); + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + assertEquals(c.expected, result.getKind(), "opName='%s'".formatted(c.opName)); + } + } + + @Test + void nullOperationNameDefaultsToInternal() { + var ddSpan = stubSpan("null-op", null, 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals(SpanKind.INTERNAL, result.getKind()); + } + + // ── Error status ─────────────────────────────────────────────────────────── + + @Test + void errorSpanHasErrorStatus() { + var ddSpan = + stubSpan("error-span", "internal", 1_000_000_000L, 500_000_000L, Map.of(), true); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals(StatusCode.ERROR, result.getStatus().getStatusCode()); + } + + @Test + void nonErrorSpanHasUnsetStatus() { + var ddSpan = stubSpan("ok-span", "internal", 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals(StatusCode.UNSET, result.getStatus().getStatusCode()); + } + + // ── Span name ────────────────────────────────────────────────────────────── + + @Test + void spanNameFromResourceName() { + var ddSpan = stubSpan("my-resource", "internal", 1_000_000_000L, 500_000_000L); + DDTraceId traceId = DDTraceId.from(1L); + + SpanData result = DDSpanConverter.convertSpan(ddSpan, traceId, 1L, 0L); + + assertEquals("my-resource", result.getName()); + } + + // ── replayTrace with topological sort ────────────────────────────────────── + + @Test + void replayTracePreservesParentChildOrder() { + var exporter = InMemorySpanExporter.create(); + var tracerProvider = + SdkTracerProvider.builder() + .setIdGenerator(OverridableIdGenerator.INSTANCE) + .addSpanProcessor(SimpleSpanProcessor.create(exporter)) + .build(); + var tracer = tracerProvider.get("test"); + + // Create SpanData: parent and child (provide child first to test sorting) + var parentCtx = + io.opentelemetry.api.trace.SpanContext.create( + "0123456789abcdef0123456789abcdef", + "aaaaaaaaaaaaaaaa", + io.opentelemetry.api.trace.TraceFlags.getSampled(), + io.opentelemetry.api.trace.TraceState.getDefault()); + var childCtx = + io.opentelemetry.api.trace.SpanContext.create( + "0123456789abcdef0123456789abcdef", + "bbbbbbbbbbbbbbbb", + io.opentelemetry.api.trace.TraceFlags.getSampled(), + io.opentelemetry.api.trace.TraceState.getDefault()); + + long now = System.nanoTime(); + var parentSpan = + new ImmutableSpanData( + parentCtx, + io.opentelemetry.api.trace.SpanContext.getInvalid(), + io.opentelemetry.sdk.resources.Resource.getDefault(), + io.opentelemetry.sdk.common.InstrumentationScopeInfo.create("test"), + "parent-span", + SpanKind.INTERNAL, + now, + now + 1_000_000, + io.opentelemetry.api.common.Attributes.empty(), + io.opentelemetry.sdk.trace.data.StatusData.unset()); + var childSpan = + new ImmutableSpanData( + childCtx, + parentCtx, + io.opentelemetry.sdk.resources.Resource.getDefault(), + io.opentelemetry.sdk.common.InstrumentationScopeInfo.create("test"), + "child-span", + SpanKind.CLIENT, + now + 100_000, + now + 900_000, + io.opentelemetry.api.common.Attributes.empty(), + io.opentelemetry.sdk.trace.data.StatusData.unset()); + + // Provide child first to verify topological sort reorders them + DDSpanConverter.replayTrace(tracer, List.of(childSpan, parentSpan)); + + var exported = exporter.getFinishedSpanItems(); + assertEquals(2, exported.size()); + var names = exported.stream().map(SpanData::getName).toList(); + assertTrue(names.contains("parent-span"), "missing parent-span in replay"); + assertTrue(names.contains("child-span"), "missing child-span in replay"); + + // The replayed spans must preserve the original trace and span IDs. + var replayedParent = + exported.stream().filter(s -> s.getName().equals("parent-span")).findFirst().get(); + var replayedChild = + exported.stream().filter(s -> s.getName().equals("child-span")).findFirst().get(); + + assertEquals( + "0123456789abcdef0123456789abcdef", + replayedParent.getSpanContext().getTraceId(), + "parent traceId must be preserved"); + assertEquals( + "aaaaaaaaaaaaaaaa", + replayedParent.getSpanContext().getSpanId(), + "parent spanId must be preserved"); + + assertEquals( + "0123456789abcdef0123456789abcdef", + replayedChild.getSpanContext().getTraceId(), + "child traceId must be preserved"); + assertEquals( + "bbbbbbbbbbbbbbbb", + replayedChild.getSpanContext().getSpanId(), + "child spanId must be preserved"); + + // The child's parent must point to the original parent span ID. + assertTrue(replayedChild.getParentSpanContext().isValid()); + assertEquals( + "aaaaaaaaaaaaaaaa", + replayedChild.getParentSpanContext().getSpanId(), + "child parentSpanId must reference original parent"); + + tracerProvider.close(); + } + + // ── Stub helper ──────────────────────────────────────────────────────────── + + private static MutableSpan stubSpan( + String resourceName, String operationName, long startTimeNanos, long durationNanos) { + return stubSpan( + resourceName, operationName, startTimeNanos, durationNanos, Map.of(), false); + } + + private static MutableSpan stubSpan( + String resourceName, + String operationName, + long startTimeNanos, + long durationNanos, + Map tags, + boolean isError) { + return new MutableSpan() { + @Override + public long getStartTime() { + return startTimeNanos; + } + + @Override + public long getDurationNano() { + return durationNanos; + } + + @Override + public CharSequence getOperationName() { + return operationName; + } + + @Override + public CharSequence getResourceName() { + return resourceName; + } + + @Override + public Map getTags() { + return tags; + } + + @Override + public boolean isError() { + return isError; + } + + // ── Methods not used by convertSpan(MutableSpan, DDTraceId, long, long) ── + @Override + public MutableSpan setOperationName(CharSequence s) { + throw new UnsupportedOperationException(); + } + + @Override + public String getServiceName() { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setServiceName(String s) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setResourceName(CharSequence s) { + throw new UnsupportedOperationException(); + } + + @Override + public Integer getSamplingPriority() { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setSamplingPriority(int i) { + throw new UnsupportedOperationException(); + } + + @Override + public String getSpanType() { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setSpanType(CharSequence s) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setTag(String k, String v) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setTag(String k, boolean v) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setTag(String k, Number v) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setMetric(CharSequence k, int v) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setMetric(CharSequence k, long v) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setMetric(CharSequence k, float v) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setMetric(CharSequence k, double v) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan setError(boolean b) { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan getRootSpan() { + throw new UnsupportedOperationException(); + } + + @Override + public MutableSpan getLocalRootSpan() { + throw new UnsupportedOperationException(); + } + }; + } +} diff --git a/braintrust-java-agent/smoke-test/dd-agent/build.gradle b/braintrust-java-agent/smoke-test/dd-agent/build.gradle new file mode 100644 index 00000000..8d606875 --- /dev/null +++ b/braintrust-java-agent/smoke-test/dd-agent/build.gradle @@ -0,0 +1,189 @@ +plugins { + id 'java' +} + +dependencies { + implementation "io.opentelemetry:opentelemetry-api:${otelVersion}" + implementation "io.opentelemetry:opentelemetry-sdk:${otelVersion}" + // dd-trace-api is provided at runtime by the DD agent on the bootstrap classpath, + // but we need it at compile time for TraceInterceptor / MutableSpan. + implementation 'com.datadoghq:dd-trace-api:1.60.1' + implementation 'org.msgpack:msgpack-core:0.9.8' + implementation 'io.opentelemetry.proto:opentelemetry-proto:1.5.0-alpha' + // Bootstrap classes (DDBridge) — needed at compile time for smoke test assertions. + // Must be compileOnly so the runtime version comes from the actual agent JAR, + // not from this dependency (which would shadow the agent's AgentBootstrap). + compileOnly project(':braintrust-java-agent:bootstrap') + runtimeOnly "org.slf4j:slf4j-simple:${slf4jVersion}" +} + +// ── Configuration to resolve the DD agent JAR from Maven Central ── +configurations { + ddAgent +} +dependencies { + ddAgent 'com.datadoghq:dd-java-agent:1.60.1' +} + +def braintrustAgentJar = project(':braintrust-java-agent').tasks.named('jar') + +/** + * Smoke test: runs DdAgentSmokeTest with both agents attached. + * + * This is a JavaExec task (not JUnit) because it needs a clean JVM with two + * -javaagent flags in a specific order. It follows the same pattern as + * testGlobalLogger in :braintrust-java-agent. + * + * Agent ordering: DD first, Braintrust second. + * The process exit code signals pass (0) or fail (non-zero). + */ +/** + * Smoke test: DD agent first, Braintrust agent second. + */ +task smokeTestDDAgentFirst(type: JavaExec) { + dependsOn braintrustAgentJar, classes + + mainClass = 'dev.braintrust.smoketest.ddagent.DdAgentSmokeTest' + classpath = sourceSets.main.runtimeClasspath + + def btAgent = braintrustAgentJar.map { it.archiveFile.get().asFile.absolutePath } + def ddAgentJar = configurations.ddAgent.singleFile + + jvmArgs = [ + "-javaagent:${ddAgentJar.absolutePath}", + "-Ddd.trace.otel.enabled=true", + "-Ddd.trace.enabled=true", + "-Ddd.trace.agent.url=http://127.0.0.1:18226", + "-Ddd.trace.flush.interval=1", + "-Dotel.java.global-autoconfigure.enabled=true", + "-Dotel.traces.exporter=none", + "-Dotel.metrics.exporter=none", + "-Dotel.logs.exporter=none", + "-Ddd.service=bt-dd-smoke-test", + "-Dbraintrust.smoketest.dd.mock.port=18226", + "-Dbraintrust.smoketest.bt.mock.port=18227", + // "-Ddd.trace.debug=true", + "-javaagent:${btAgent.get()}", + ] + + // Make sure OTEL/DD environment doesn't leak from the host + environment.remove('DD_API_KEY') + environment.remove('DD_AGENT_HOST') + environment.remove('OTEL_EXPORTER_OTLP_ENDPOINT') + environment.remove('OTEL_TRACES_EXPORTER') + + environment 'BT_AGENT_EXPECTED', 'true' + environment 'BRAINTRUST_FILTER_AI_SPANS', 'true' + environment 'BRAINTRUST_API_URL', 'http://127.0.0.1:18227' + environment 'BRAINTRUST_API_KEY', 'test-api-key' + environment 'OTEL_TRACES_EXPORTER', 'none' + environment 'OTEL_METRICS_EXPORTER', 'none' + environment 'OTEL_LOGS_EXPORTER', 'none' + + // ── Caching: declare inputs so Gradle can skip/cache this task ── + inputs.files(braintrustAgentJar) + inputs.files(configurations.ddAgent) + inputs.files(sourceSets.main.output) + def markerFile = layout.buildDirectory.file('test-results/dd-agent-first-smoke-test.passed') + outputs.file(markerFile) + + doLast { + markerFile.get().asFile.tap { + parentFile.mkdirs() + text = "passed at ${java.time.Instant.now()}" + } + } +} + +/** + * Smoke test: DD agent only (no Braintrust agent). + * Verifies that DD-only assertions pass and OTel assertions are skipped. + */ +task smokeTestDdOnly(type: JavaExec) { + dependsOn classes + + mainClass = 'dev.braintrust.smoketest.ddagent.DdAgentSmokeTest' + classpath = sourceSets.main.runtimeClasspath + + def ddAgentJar = configurations.ddAgent.singleFile + + jvmArgs = [ + "-javaagent:${ddAgentJar.absolutePath}", + "-Ddd.trace.otel.enabled=true", + "-Ddd.trace.enabled=true", + "-Ddd.trace.agent.url=http://127.0.0.1:18228", + "-Ddd.trace.flush.interval=1", + "-Ddd.service=bt-dd-smoke-test", + "-Dbraintrust.smoketest.dd.mock.port=18228", + ] + + environment.remove('DD_API_KEY') + environment.remove('DD_AGENT_HOST') + environment.remove('OTEL_EXPORTER_OTLP_ENDPOINT') + environment.remove('OTEL_TRACES_EXPORTER') + + environment 'BT_AGENT_EXPECTED', 'false' + + inputs.files(configurations.ddAgent) + inputs.files(sourceSets.main.output) + def markerFile = layout.buildDirectory.file('test-results/dd-only-smoke-test.passed') + outputs.file(markerFile) + + doLast { + markerFile.get().asFile.tap { + parentFile.mkdirs() + text = "passed at ${java.time.Instant.now()}" + } + } +} + +/** + * Smoke test: DD agent without OTel compatibility mode. + * Verifies that DD-native tracing (@Trace annotations) still works + * when dd.trace.otel.enabled is NOT set. + */ +task smokeTestDdNoOtel(type: JavaExec) { + dependsOn braintrustAgentJar, classes + + mainClass = 'dev.braintrust.smoketest.ddagent.DdNoOtelSmokeTest' + classpath = sourceSets.main.runtimeClasspath + + def btAgent = braintrustAgentJar.map { it.archiveFile.get().asFile.absolutePath } + def ddAgentJar = configurations.ddAgent.singleFile + + jvmArgs = [ + "-javaagent:${ddAgentJar.absolutePath}", + "-Ddd.trace.enabled=true", + "-Ddd.trace.agent.url=http://127.0.0.1:18230", + "-Ddd.trace.flush.interval=1", + "-Ddd.service=bt-dd-smoke-test", + "-Dbraintrust.smoketest.dd.mock.port=18230", + "-Dbraintrust.smoketest.bt.mock.port=18231", + // dd.trace.otel.enabled is intentionally NOT set + "-javaagent:${btAgent.get()}", + ] + + environment.remove('DD_API_KEY') + environment.remove('DD_AGENT_HOST') + environment.remove('OTEL_EXPORTER_OTLP_ENDPOINT') + environment.remove('OTEL_TRACES_EXPORTER') + + environment 'BT_AGENT_EXPECTED', 'true' + environment 'BRAINTRUST_API_URL', 'http://127.0.0.1:18231' + environment 'BRAINTRUST_API_KEY', 'test-api-key' + + inputs.files(braintrustAgentJar) + inputs.files(configurations.ddAgent) + inputs.files(sourceSets.main.output) + def markerFile = layout.buildDirectory.file('test-results/dd-no-otel-smoke-test.passed') + outputs.file(markerFile) + + doLast { + markerFile.get().asFile.tap { + parentFile.mkdirs() + text = "passed at ${java.time.Instant.now()}" + } + } +} + +test.dependsOn smokeTestDdOnly, smokeTestDDAgentFirst, smokeTestDdNoOtel diff --git a/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/DdAgentSmokeTest.java b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/DdAgentSmokeTest.java new file mode 100644 index 00000000..f81c674f --- /dev/null +++ b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/DdAgentSmokeTest.java @@ -0,0 +1,621 @@ +package dev.braintrust.smoketest.ddagent; + +import dev.braintrust.smoketest.ddagent.MockBraintrustBackend.OtlpSpan; +import dev.braintrust.smoketest.ddagent.MockDdAgentServer.DdSpan; +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.logs.Logger; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.api.trace.*; +import io.opentelemetry.context.Context; +import io.opentelemetry.context.propagation.TextMapGetter; +import io.opentelemetry.context.propagation.TextMapPropagator; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + * Smoke test that runs with both the Braintrust agent and the Datadog agent attached. A mock DD + * Agent HTTP server captures trace payloads for assertion instead of using in-process interceptors. + */ +public class DdAgentSmokeTest { + + /** + * Port the mock DD agent server listens on (must match -Ddd.trace.agent.url in build.gradle). + */ + static final int MOCK_DD_AGENT_PORT = + Integer.getInteger("braintrust.smoketest.dd.mock.port", 18126); + + /** + * Port the mock Braintrust backend listens on (must match BRAINTRUST_API_URL in build.gradle). + */ + static final int MOCK_BT_BACKEND_PORT = + Integer.getInteger("braintrust.smoketest.bt.mock.port", 18127); + + /** True if the Braintrust agent is attached (its bootstrap class is on the classpath). */ + static final boolean BT_AGENT_ENABLED = isBraintrustAgentPresent(); + + /** + * If set, asserts that the BT agent detection result matches the expected value. This catches + * bugs in the detection logic itself — e.g., if the bootstrap class is renamed or the + * classloader probe breaks. + */ + static final Boolean BT_AGENT_EXPECTED = parseBooleanEnv("BT_AGENT_EXPECTED"); + + public static void main(String[] args) throws Exception { + assertBtAgentDetection(); + + System.out.println( + "[smoke-test] Braintrust agent: " + + (BT_AGENT_ENABLED + ? "ENABLED" + : "NOT DETECTED — skipping braintrust assertions")); + + // Start mock DD agent server to capture DD trace payloads. + var mockDdServer = new MockDdAgentServer(1, MOCK_DD_AGENT_PORT); + mockDdServer.start(); + + // Start mock Braintrust backend to capture OTLP trace exports. + // When the BT agent is present, we expect at least 1 trace export request. + var mockBtBackend = + new MockBraintrustBackend(BT_AGENT_ENABLED ? 1 : 0, MOCK_BT_BACKEND_PORT); + mockBtBackend.start(); + + try { + OpenTelemetry otel = GlobalOpenTelemetry.get(); + assertDdShimInstalled(otel); + + assertOtelTraces(otel, mockDdServer, mockBtBackend); + assertDistributedTrace(otel, mockDdServer); + assertOtelLogs(otel); + assertOtelMetrics(otel); + + System.out.println("=== Smoke test passed ==="); + } finally { + mockDdServer.stop(); + mockBtBackend.stop(); + } + } + + private static void assertOtelTraces( + OpenTelemetry otel, MockDdAgentServer mockServer, MockBraintrustBackend mockBtBackend) + throws Exception { + Tracer instTracer = otel.getTracer("braintrust-dd-smoke-test"); + Span root = + instTracer + .spanBuilder("smoke-test-span") + .setSpanKind(SpanKind.INTERNAL) + .setAttribute("braintrust.whatever", 1) + .setAttribute("test.source", "braintrust-dd-smoke-test") + .startSpan(); + try (var ignored = root.makeCurrent()) { + Span child = + instTracer + .spanBuilder("smoke-test-child") + .setSpanKind(SpanKind.CLIENT) + .setAttribute("braintrust.whatever", 2) + .setAttribute("test.child", true) + .startSpan(); + try (var ignored2 = child.makeCurrent()) { + assertContextPropagation( + otel, + child.getSpanContext().getTraceId(), + child.getSpanContext().getSpanId()); + } finally { + child.end(); + } + } finally { + root.end(); + } + + // Wait for DD to flush traces to our mock server. + boolean received = mockServer.awaitTraces(30, TimeUnit.SECONDS); + assertTrue(received, "timed out waiting for DD trace payload on mock server"); + + var allTraces = mockServer.getReceivedTraces(); + assertTrue(!allTraces.isEmpty(), "no traces received by mock DD agent server"); + + // DD may batch multiple traces into one payload or send multiple payloads. + // Flatten and find our smoke-test spans. + var allSpans = mockServer.getAllSpans(); + System.out.println( + "[smoke-test] Mock DD agent received %d trace(s), %d total span(s)" + .formatted(allTraces.size(), allSpans.size())); + + DdSpan ddRoot = findSpanByResource(allSpans, "smoke-test-span"); + DdSpan ddChild = findSpanByResource(allSpans, "smoke-test-child"); + assertNotNull(ddRoot, "missing DD root span (resource='smoke-test-span')"); + assertNotNull(ddChild, "missing DD child span (resource='smoke-test-child')"); + + assertDdSpanMetadata(ddRoot, ddChild); + + if (BT_AGENT_ENABLED) { + assertBraintrustSpans(mockBtBackend); + } else { + System.out.println( + "[smoke-test] Braintrust backend received %d trace request(s), %d log request(s) (no assertions)" + .formatted( + mockBtBackend.traceRequestCount(), + mockBtBackend.logRequestCount())); + } + } + + /** + * Simulates a distributed trace: creates a span under a fake remote parent context and verifies + * the DD span preserves the parent ID linkage. + */ + private static void assertDistributedTrace(OpenTelemetry otel, MockDdAgentServer mockServer) + throws Exception { + Tracer tracer = otel.getTracer("braintrust-dd-smoke-test"); + + // Simulate an incoming remote parent (e.g., from a traceparent header). + SpanContext remoteParent = + SpanContext.createFromRemoteParent( + "aabbccddaabbccddaabbccddaabbccdd", + "1234567812345678", + TraceFlags.getSampled(), + TraceState.getDefault()); + + Context parentContext = Context.root().with(Span.wrap(remoteParent)); + + Span localRoot = + tracer.spanBuilder("distributed-trace-entry") + .setParent(parentContext) + .setSpanKind(SpanKind.SERVER) + .setAttribute("braintrust.whatever", 1) + .setAttribute("test.distributed", true) + .startSpan(); + try (var ignored = localRoot.makeCurrent()) { + Span child = + tracer.spanBuilder("distributed-trace-child") + .setSpanKind(SpanKind.INTERNAL) + .setAttribute("braintrust.whatever", 2) + .startSpan(); + child.end(); + } finally { + localRoot.end(); + } + + // Wait for the distributed trace spans to arrive at the DD mock server. + // DD may batch them with the earlier trace or send a separate flush. + DdSpan ddEntry = pollForDdSpan(mockServer, "distributed-trace-entry", 30); + DdSpan ddChild = pollForDdSpan(mockServer, "distributed-trace-child", 30); + + var errors = new ArrayList(); + + // The local root should have a non-zero parent ID pointing to the remote parent. + // The remote parent span ID "1234567812345678" in hex = 0x1234567812345678 in decimal. + long expectedParentId = Long.parseUnsignedLong("1234567812345678", 16); + if (ddEntry.parentId != expectedParentId) { + errors.add( + "distributed trace entry parentId: expected=%d (remote parent) actual=%d" + .formatted(expectedParentId, ddEntry.parentId)); + } + + // The child should be parented to the local root. + if (ddChild.parentId != ddEntry.spanId) { + errors.add( + "distributed trace child parentId: expected=%d (entry spanId) actual=%d" + .formatted(ddEntry.spanId, ddChild.parentId)); + } + + // Both should share the same trace ID. + if (ddEntry.traceId != ddChild.traceId) { + errors.add( + "distributed trace traceId mismatch: entry=%d child=%d" + .formatted(ddEntry.traceId, ddChild.traceId)); + } + + if (!errors.isEmpty()) { + throw new RuntimeException( + "Distributed trace assertions:\n " + String.join("\n ", errors)); + } + System.out.println( + "[smoke-test] Distributed trace (remote parent linkage, parent-child, traceId):" + + " OK"); + } + + private static void assertOtelLogs(OpenTelemetry otel) { + // DD does not have an interceptor for this at the moment, so we just exercise the API. + Logger logger = otel.getLogsBridge().get("braintrust-dd-smoke-test"); + logger.logRecordBuilder() + .setBody("smoke-test log record") + .setAttribute(AttributeKey.stringKey("test.source"), "smoke-test") + .emit(); + } + + private static void assertOtelMetrics(OpenTelemetry otel) { + // DD does not have an interceptor for this at the moment, so we just exercise the API. + Meter meter = otel.getMeter("braintrust-dd-smoke-test"); + LongCounter counter = + meter.counterBuilder("smoke_test.dd_counter") + .setDescription("DD smoke test counter") + .build(); + counter.add(42); + } + + /** + * Verifies DD's OTel shim is installed as the GlobalOpenTelemetry implementation. The + * TracerProvider and ContextPropagators should be DD shim classes. + */ + private static void assertDdShimInstalled(OpenTelemetry otel) { + var errors = new ArrayList(); + + String tpClass = otel.getTracerProvider().getClass().getName(); + if (!tpClass.contains("datadog")) { + errors.add("TracerProvider is not DD shim: %s".formatted(tpClass)); + } + + String mpClass = otel.getMeterProvider().getClass().getName(); + + String propClass = otel.getPropagators().getClass().getName(); + if (!propClass.contains("datadog")) { + errors.add("ContextPropagators is not DD shim: %s".formatted(propClass)); + } + + if (!errors.isEmpty()) { + throw new RuntimeException("DD shim installation:\n " + String.join("\n ", errors)); + } + System.out.println( + "[smoke-test] DD shim installed: OK (TracerProvider=%s, MeterProvider=%s, Propagators=%s)" + .formatted(tpClass, mpClass, propClass)); + } + + private static final TextMapGetter> MAP_GETTER = + new TextMapGetter<>() { + @Override + public Iterable keys(Map carrier) { + return carrier.keySet(); + } + + @Override + public String get(Map carrier, String key) { + return carrier.get(key); + } + }; + + private static void assertContextPropagation( + OpenTelemetry otel, String traceId, String spanId) { + var errors = new ArrayList(); + TextMapPropagator propagator = otel.getPropagators().getTextMapPropagator(); + + // Inject + Map carrier = new HashMap<>(); + propagator.inject(Context.current(), carrier, Map::put); + + if (carrier.isEmpty()) { + throw new RuntimeException("context propagation: inject produced no headers"); + } + + // Verify traceparent header (W3C format: 00---) + String traceparent = carrier.get("traceparent"); + if (traceparent == null) { + errors.add( + "traceparent header missing from injected headers (got: %s)" + .formatted(carrier.keySet())); + } else { + String[] parts = traceparent.split("-"); + if (parts.length != 4) { + errors.add("traceparent malformed: '%s'".formatted(traceparent)); + } else { + if (!parts[1].equals(traceId)) { + errors.add( + "traceparent traceId: expected=%s actual=%s" + .formatted(traceId, parts[1])); + } + if (!parts[2].equals(spanId)) { + errors.add( + "traceparent spanId: expected=%s actual=%s" + .formatted(spanId, parts[2])); + } + } + } + + assertDdPropagationHeaders(carrier, traceId, spanId, errors); + + Context extracted = propagator.extract(Context.root(), carrier, MAP_GETTER); + Span extractedSpan = Span.fromContext(extracted); + SpanContext extractedCtx = extractedSpan.getSpanContext(); + + if (!extractedCtx.isValid()) { + errors.add("extracted context is invalid"); + } else { + if (!extractedCtx.getTraceId().equals(traceId)) { + errors.add( + "extracted traceId: expected=%s actual=%s" + .formatted(traceId, extractedCtx.getTraceId())); + } + if (!extractedCtx.getSpanId().equals(spanId)) { + errors.add( + "extracted spanId: expected=%s actual=%s" + .formatted(spanId, extractedCtx.getSpanId())); + } + } + + if (!errors.isEmpty()) { + throw new RuntimeException("context propagation:\n " + String.join("\n ", errors)); + } + System.out.println( + "[smoke-test] Context propagation (inject/extract): OK (headers: %s)" + .formatted(carrier.keySet())); + } + + /** Asserts DD-specific propagation headers are present and contain correct values. */ + private static void assertDdPropagationHeaders( + Map carrier, String traceId, String spanId, List errors) { + String ddTraceId = carrier.get("x-datadog-trace-id"); + if (ddTraceId == null) { + errors.add("x-datadog-trace-id header missing"); + } else { + long expectedDdTraceId = Long.parseUnsignedLong(traceId.substring(16), 16); + if (!ddTraceId.equals(Long.toUnsignedString(expectedDdTraceId))) { + errors.add( + "x-datadog-trace-id: expected=%s actual=%s (otel traceId=%s)" + .formatted( + Long.toUnsignedString(expectedDdTraceId), + ddTraceId, + traceId)); + } + } + + String ddParentId = carrier.get("x-datadog-parent-id"); + if (ddParentId == null) { + errors.add("x-datadog-parent-id header missing"); + } else { + long expectedDdSpanId = Long.parseUnsignedLong(spanId, 16); + if (!ddParentId.equals(Long.toUnsignedString(expectedDdSpanId))) { + errors.add( + "x-datadog-parent-id: expected=%s actual=%s (otel spanId=%s)" + .formatted( + Long.toUnsignedString(expectedDdSpanId), + ddParentId, + spanId)); + } + } + + if (!carrier.containsKey("x-datadog-sampling-priority")) { + errors.add("x-datadog-sampling-priority header missing"); + } + } + + /** + * Asserts DD-specific metadata on the captured spans from the mock server: service name, + * operation name, and parent-child structure. + */ + private static void assertDdSpanMetadata(DdSpan ddRoot, DdSpan ddChild) { + var errors = new ArrayList(); + + // Service name + if (!"bt-dd-smoke-test".equals(ddRoot.service)) { + errors.add( + "root service: expected='bt-dd-smoke-test' actual='%s'" + .formatted(ddRoot.service)); + } + if (!"bt-dd-smoke-test".equals(ddChild.service)) { + errors.add( + "child service: expected='bt-dd-smoke-test' actual='%s'" + .formatted(ddChild.service)); + } + + // Operation name + if (!"internal".equals(ddRoot.name)) { + errors.add("root operation: expected='internal' actual='%s'".formatted(ddRoot.name)); + } + if (!"client.request".equals(ddChild.name)) { + errors.add( + "child operation: expected='client.request' actual='%s'" + .formatted(ddChild.name)); + } + + // Parent-child linking: child's parentId should be root's spanId + if (ddChild.parentId != ddRoot.spanId) { + errors.add( + "child parentId: expected=%d (root spanId) actual=%d" + .formatted(ddRoot.spanId, ddChild.parentId)); + } + + // Same trace ID + if (ddChild.traceId != ddRoot.traceId) { + errors.add( + "traceId mismatch: root=%d child=%d" + .formatted(ddRoot.traceId, ddChild.traceId)); + } + + // Duration should be positive + if (ddRoot.duration <= 0) { + errors.add("root duration <= 0: %d".formatted(ddRoot.duration)); + } + if (ddChild.duration <= 0) { + errors.add("child duration <= 0: %d".formatted(ddChild.duration)); + } + + if (!errors.isEmpty()) { + throw new RuntimeException("DD span metadata:\n " + String.join("\n ", errors)); + } + System.out.println( + "[smoke-test] DD span metadata (service, operation, parent-child, timing): OK"); + } + + // ── Braintrust backend assertions ───────────────────────────────────────── + + /** DD internal tags that should NOT appear on bridged Braintrust spans. */ + private static final Set DROPPED_DD_TAGS = + Set.of( + "_dd.agent_psr", + "_dd.profiling.enabled", + "_dd.trace_span_attribute_schema", + "_sample_rate"); + + /** + * Asserts that the Braintrust mock backend received the expected spans with correct structure + * and attributes. + */ + private static void assertBraintrustSpans(MockBraintrustBackend mockBtBackend) + throws Exception { + boolean received = mockBtBackend.awaitTraces(30, TimeUnit.SECONDS); + assertTrue(received, "timed out waiting for Braintrust trace export"); + + var allSpans = mockBtBackend.getAllSpans(); + System.out.println( + "[smoke-test] Braintrust backend received %d trace request(s), %d span(s)" + .formatted(mockBtBackend.traceRequestCount(), allSpans.size())); + + assertTrue( + allSpans.size() >= 2, + "expected at least 2 Braintrust spans, got " + allSpans.size()); + + OtlpSpan btRoot = mockBtBackend.findSpanByName("smoke-test-span"); + OtlpSpan btChild = mockBtBackend.findSpanByName("smoke-test-child"); + assertNotNull(btRoot, "missing Braintrust root span (name='smoke-test-span')"); + assertNotNull(btChild, "missing Braintrust child span (name='smoke-test-child')"); + + var errors = new ArrayList(); + + // ── Root span assertions ── + if (btRoot.kind != io.opentelemetry.proto.trace.v1.Span.SpanKind.SPAN_KIND_INTERNAL) { + errors.add("root kind: expected=INTERNAL actual=%s".formatted(btRoot.kind)); + } + if (!"braintrust-dd-smoke-test".equals(btRoot.stringAttr("test.source"))) { + errors.add( + "root test.source: expected='braintrust-dd-smoke-test' actual='%s'" + .formatted(btRoot.stringAttr("test.source"))); + } + + // ── Child span assertions ── + if (btChild.kind != io.opentelemetry.proto.trace.v1.Span.SpanKind.SPAN_KIND_CLIENT) { + errors.add("child kind: expected=CLIENT actual=%s".formatted(btChild.kind)); + } + if (!Boolean.TRUE.equals(btChild.boolAttr("test.child"))) { + errors.add( + "child test.child: expected=true actual=%s" + .formatted(btChild.boolAttr("test.child"))); + } + Long whateverVal = btChild.longAttr("braintrust.whatever"); + if (whateverVal == null || whateverVal != 2L) { + errors.add("child braintrust.whatever: expected=2 actual=%s".formatted(whateverVal)); + } + + // ── Parent-child relationship ── + if (!btChild.traceId.equals(btRoot.traceId)) { + errors.add( + "traceId mismatch: root=%s child=%s" + .formatted(btRoot.traceId, btChild.traceId)); + } + if (!btChild.parentSpanId.equals(btRoot.spanId)) { + errors.add( + "child parentSpanId: expected=%s (root spanId) actual=%s" + .formatted(btRoot.spanId, btChild.parentSpanId)); + } + if (btRoot.traceId.replace("0", "").isEmpty()) { + errors.add("root has zero/empty traceId: " + btRoot.traceId); + } + + // ── Timing ── + if (btRoot.startTimeUnixNano <= 0) { + errors.add("root startTime <= 0"); + } + if (btRoot.endTimeUnixNano <= btRoot.startTimeUnixNano) { + errors.add("root endTime <= startTime"); + } + if (btChild.startTimeUnixNano <= 0) { + errors.add("child startTime <= 0"); + } + if (btChild.endTimeUnixNano <= btChild.startTimeUnixNano) { + errors.add("child endTime <= startTime"); + } + + // ── Dropped DD tags should not be present ── + for (OtlpSpan span : List.of(btRoot, btChild)) { + for (String droppedTag : DROPPED_DD_TAGS) { + if (span.attributes.containsKey(droppedTag)) { + errors.add( + "span '%s' has dropped DD tag '%s'".formatted(span.name, droppedTag)); + } + } + } + + if (!errors.isEmpty()) { + throw new RuntimeException( + "Braintrust span assertions:\n " + String.join("\n ", errors)); + } + System.out.println( + "[smoke-test] Braintrust spans (structure, attributes, timing, no DD internal" + + " tags): OK"); + } + + // ── Helpers ───────────────────────────────────────────────────────────────── + + private static DdSpan findSpanByResource(List spans, String resource) { + for (var span : spans) { + if (resource.equals(span.resource)) { + return span; + } + } + return null; + } + + private static DdSpan pollForDdSpan( + MockDdAgentServer server, String resource, int timeoutSeconds) throws Exception { + long deadline = System.currentTimeMillis() + timeoutSeconds * 1000L; + while (System.currentTimeMillis() < deadline) { + DdSpan span = findSpanByResource(server.getAllSpans(), resource); + if (span != null) { + return span; + } + Thread.sleep(200); + } + throw new RuntimeException( + "timed out waiting for DD span with resource='%s' (received %d spans: %s)" + .formatted(resource, server.getAllSpans().size(), server.getAllSpans())); + } + + private static void assertNotNull(Object object, String msg) { + assertTrue(object != null, msg); + } + + private static void assertTrue(boolean condition, String failMessage) { + if (!condition) { + throw new RuntimeException(failMessage); + } + } + + private static void assertBtAgentDetection() { + if (BT_AGENT_EXPECTED != null) { + if (BT_AGENT_EXPECTED && !BT_AGENT_ENABLED) { + throw new RuntimeException( + "BT_AGENT_EXPECTED=true but agent was NOT detected." + + " Detection logic may be broken."); + } + if (!BT_AGENT_EXPECTED && BT_AGENT_ENABLED) { + throw new RuntimeException( + "BT_AGENT_EXPECTED=false but agent WAS detected." + + " Agent may be leaking onto the classpath."); + } + System.out.println( + "[smoke-test] BT agent detection: OK (expected=%s, detected=%s)" + .formatted(BT_AGENT_EXPECTED, BT_AGENT_ENABLED)); + } + } + + private static Boolean parseBooleanEnv(String name) { + String value = System.getenv(name); + return value != null ? Boolean.parseBoolean(value) : null; + } + + private static boolean isBraintrustAgentPresent() { + try { + Class.forName( + "dev.braintrust.system.AgentBootstrap", + false, + ClassLoader.getSystemClassLoader()); + return true; + } catch (Exception e) { + return false; + } + } +} diff --git a/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/DdNoOtelSmokeTest.java b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/DdNoOtelSmokeTest.java new file mode 100644 index 00000000..ec69dcee --- /dev/null +++ b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/DdNoOtelSmokeTest.java @@ -0,0 +1,390 @@ +package dev.braintrust.smoketest.ddagent; + +import datadog.trace.api.Trace; +import dev.braintrust.smoketest.ddagent.MockBraintrustBackend.OtlpSpan; +import dev.braintrust.smoketest.ddagent.MockDdAgentServer.DdSpan; +import io.opentelemetry.api.GlobalOpenTelemetry; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.Tracer; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * Smoke test that runs with the Datadog agent (WITHOUT OTel compatibility mode) and the Braintrust + * agent. Verifies that: + * + *

    + *
  1. DD-native spans ({@code @Trace}) are sent to the DD backend only + *
  2. OTel API spans are sent to the Braintrust backend only + *
  3. Neither system receives the other's spans + *
+ */ +public class DdNoOtelSmokeTest { + + static final int MOCK_DD_AGENT_PORT = + Integer.getInteger("braintrust.smoketest.dd.mock.port", 18126); + + static final int MOCK_BT_BACKEND_PORT = + Integer.getInteger("braintrust.smoketest.bt.mock.port", 18127); + + /** True if the Braintrust agent is attached. */ + static final boolean BT_AGENT_ENABLED = isBraintrustAgentPresent(); + + /** If set, asserts that BT agent detection matches the expected value. */ + static final Boolean BT_AGENT_EXPECTED = parseBooleanEnv("BT_AGENT_EXPECTED"); + + /** DD @Trace span resource names — used to verify isolation. */ + private static final String DD_ROOT_RESOURCE = "dd-traced-root"; + + private static final String DD_CHILD_RESOURCE = "dd-traced-child"; + + /** OTel span names — used to verify isolation. */ + private static final String OTEL_ROOT_NAME = "otel-root-span"; + + private static final String OTEL_CHILD_NAME = "otel-child-span"; + + public static void main(String[] args) throws Exception { + assertBtAgentDetection(); + + System.out.println( + "[no-otel-smoke-test] Braintrust agent: " + + (BT_AGENT_ENABLED + ? "ENABLED" + : "NOT DETECTED — skipping braintrust assertions")); + + var mockDdServer = new MockDdAgentServer(1, MOCK_DD_AGENT_PORT); + mockDdServer.start(); + + var mockBtBackend = new MockBraintrustBackend(0, MOCK_BT_BACKEND_PORT); + mockBtBackend.start(); + + try { + assertOtelShimNotInstalled(); + + // Create DD-native spans via @Trace + exerciseTracedMethods(); + + // Create OTel API spans + exerciseOtelSpans(); + + // Wait for DD to flush + boolean ddReceived = mockDdServer.awaitTraces(30, TimeUnit.SECONDS); + assertTrue(ddReceived, "timed out waiting for DD trace payload on mock server"); + + var ddSpans = mockDdServer.getAllSpans(); + System.out.println( + "[no-otel-smoke-test] DD mock received %d span(s)".formatted(ddSpans.size())); + + // Assert DD got the @Trace spans + assertDdTracedSpans(ddSpans); + + // Assert DD did NOT get the OTel spans + assertDdDoesNotHaveOtelSpans(ddSpans); + + if (BT_AGENT_ENABLED) { + // Poll until the expected OTel spans arrive (the BT exporter batches + // and may send multiple requests, so a simple latch count isn't reliable). + List btSpans = pollForSpan(mockBtBackend, OTEL_ROOT_NAME, 10); + + System.out.println( + "[no-otel-smoke-test] BT mock received %d span(s)" + .formatted(btSpans.size())); + + // Assert BT got the OTel spans + assertBtOtelSpans(btSpans); + + // Assert BT did NOT get the DD @Trace spans + assertBtDoesNotHaveDdSpans(btSpans); + } else { + System.out.println( + "[no-otel-smoke-test] BT backend received %d trace request(s) (no assertions)" + .formatted(mockBtBackend.traceRequestCount())); + } + + System.out.println("=== No-OTel smoke test passed ==="); + } finally { + mockDdServer.stop(); + mockBtBackend.stop(); + } + } + + /** + * Verify that without {@code dd.trace.otel.enabled=true}, the DD OTel shim is NOT installed. + */ + private static void assertOtelShimNotInstalled() { + OpenTelemetry otel = GlobalOpenTelemetry.get(); + String tpClass = otel.getTracerProvider().getClass().getName(); + if (tpClass.contains("datadog")) { + throw new RuntimeException( + "DD OTel shim should NOT be installed in no-otel mode, but TracerProvider is: " + + tpClass); + } + System.out.println( + "[no-otel-smoke-test] OTel shim not installed: OK (TracerProvider=%s)" + .formatted(tpClass)); + } + + // ── DD @Trace-annotated methods ──────────────────────────────────────────── + + private static void exerciseTracedMethods() { + tracedRootOperation(); + } + + @Trace(operationName = "root.operation", resourceName = DD_ROOT_RESOURCE) + private static void tracedRootOperation() { + tracedChildOperation(); + } + + @Trace(operationName = "child.operation", resourceName = DD_CHILD_RESOURCE) + private static void tracedChildOperation() { + try { + Thread.sleep(5); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + // ── OTel API spans ───────────────────────────────────────────────────────── + + private static void exerciseOtelSpans() { + OpenTelemetry otel = GlobalOpenTelemetry.get(); + Tracer tracer = otel.getTracer("braintrust-no-otel-smoke-test"); + + Span root = + tracer.spanBuilder(OTEL_ROOT_NAME) + .setSpanKind(SpanKind.INTERNAL) + .setAttribute("test.source", "no-otel-smoke-test") + .startSpan(); + try (var ignored = root.makeCurrent()) { + Span child = + tracer.spanBuilder(OTEL_CHILD_NAME) + .setSpanKind(SpanKind.CLIENT) + .setAttribute("test.child", true) + .startSpan(); + try (var ignored2 = child.makeCurrent()) { + try { + Thread.sleep(5); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } finally { + child.end(); + } + } finally { + root.end(); + } + } + + // ── DD assertions ────────────────────────────────────────────────────────── + + private static void assertDdTracedSpans(List ddSpans) { + var errors = new ArrayList(); + + DdSpan root = findDdSpanByResource(ddSpans, DD_ROOT_RESOURCE); + DdSpan child = findDdSpanByResource(ddSpans, DD_CHILD_RESOURCE); + + if (root == null) errors.add("missing DD span with resource='" + DD_ROOT_RESOURCE + "'"); + if (child == null) errors.add("missing DD span with resource='" + DD_CHILD_RESOURCE + "'"); + if (!errors.isEmpty()) { + throw new RuntimeException( + "DD @Trace span assertions:\n " + + String.join("\n ", errors) + + "\n available: " + + ddSpans); + } + + if (!"root.operation".equals(root.name)) { + errors.add( + "root operation: expected='root.operation' actual='%s'".formatted(root.name)); + } + if (!"child.operation".equals(child.name)) { + errors.add( + "child operation: expected='child.operation' actual='%s'" + .formatted(child.name)); + } + if (!"bt-dd-smoke-test".equals(root.service)) { + errors.add( + "root service: expected='bt-dd-smoke-test' actual='%s'" + .formatted(root.service)); + } + if (child.traceId != root.traceId) { + errors.add("traceId mismatch: root=%d child=%d".formatted(root.traceId, child.traceId)); + } + if (child.parentId != root.spanId) { + errors.add( + "child parentId: expected=%d actual=%d".formatted(root.spanId, child.parentId)); + } + if (root.duration <= 0) { + errors.add("root duration <= 0: %d".formatted(root.duration)); + } + if (child.duration <= 0) { + errors.add("child duration <= 0: %d".formatted(child.duration)); + } + + if (!errors.isEmpty()) { + throw new RuntimeException( + "DD @Trace span assertions:\n " + String.join("\n ", errors)); + } + System.out.println("[no-otel-smoke-test] DD @Trace spans: OK"); + } + + /** Verify DD mock did NOT receive any OTel-originated spans. */ + private static void assertDdDoesNotHaveOtelSpans(List ddSpans) { + for (var span : ddSpans) { + if (OTEL_ROOT_NAME.equals(span.resource) || OTEL_CHILD_NAME.equals(span.resource)) { + throw new RuntimeException( + "DD mock should NOT contain OTel span, but found: " + span); + } + // Also check the name field (DD operation name) + if (OTEL_ROOT_NAME.equals(span.name) || OTEL_CHILD_NAME.equals(span.name)) { + throw new RuntimeException( + "DD mock should NOT contain OTel span, but found: " + span); + } + } + System.out.println("[no-otel-smoke-test] DD isolation (no OTel spans leaked to DD): OK"); + } + + // ── Braintrust assertions ────────────────────────────────────────────────── + + private static void assertBtOtelSpans(List btSpans) { + var errors = new ArrayList(); + + OtlpSpan root = findOtlpSpanByName(btSpans, OTEL_ROOT_NAME); + OtlpSpan child = findOtlpSpanByName(btSpans, OTEL_CHILD_NAME); + + if (root == null) errors.add("missing BT span with name='" + OTEL_ROOT_NAME + "'"); + if (child == null) errors.add("missing BT span with name='" + OTEL_CHILD_NAME + "'"); + if (!errors.isEmpty()) { + throw new RuntimeException( + "BT OTel span assertions:\n " + + String.join("\n ", errors) + + "\n available: " + + btSpans); + } + + if (!"no-otel-smoke-test".equals(root.stringAttr("test.source"))) { + errors.add( + "root test.source: expected='no-otel-smoke-test' actual='%s'" + .formatted(root.stringAttr("test.source"))); + } + if (!Boolean.TRUE.equals(child.boolAttr("test.child"))) { + errors.add( + "child test.child: expected=true actual=%s" + .formatted(child.boolAttr("test.child"))); + } + if (!child.traceId.equals(root.traceId)) { + errors.add("traceId mismatch: root=%s child=%s".formatted(root.traceId, child.traceId)); + } + if (!child.parentSpanId.equals(root.spanId)) { + errors.add( + "child parentSpanId: expected=%s actual=%s" + .formatted(root.spanId, child.parentSpanId)); + } + if (root.startTimeUnixNano <= 0) { + errors.add("root startTime <= 0"); + } + if (child.startTimeUnixNano <= 0) { + errors.add("child startTime <= 0"); + } + + if (!errors.isEmpty()) { + throw new RuntimeException( + "BT OTel span assertions:\n " + String.join("\n ", errors)); + } + System.out.println("[no-otel-smoke-test] BT OTel spans: OK"); + } + + /** Verify Braintrust mock did NOT receive any DD @Trace-originated spans. */ + private static void assertBtDoesNotHaveDdSpans(List btSpans) { + for (var span : btSpans) { + if (DD_ROOT_RESOURCE.equals(span.name) || DD_CHILD_RESOURCE.equals(span.name)) { + throw new RuntimeException( + "BT mock should NOT contain DD @Trace span, but found: " + span); + } + } + System.out.println("[no-otel-smoke-test] BT isolation (no DD spans leaked to BT): OK"); + } + + // ── Helpers ───────────────────────────────────────────────────────────────── + + private static DdSpan findDdSpanByResource(List spans, String resource) { + for (var span : spans) { + if (resource.equals(span.resource)) { + return span; + } + } + return null; + } + + private static OtlpSpan findOtlpSpanByName(List spans, String name) { + for (var span : spans) { + if (name.equals(span.name)) { + return span; + } + } + return null; + } + + /** + * Polls the BT backend until a span with the given name appears, or timeout. Returns all spans + * collected at that point. + */ + private static List pollForSpan( + MockBraintrustBackend backend, String spanName, int timeoutSeconds) throws Exception { + long deadline = System.currentTimeMillis() + timeoutSeconds * 1000L; + while (System.currentTimeMillis() < deadline) { + var spans = backend.getAllSpans(); + if (findOtlpSpanByName(spans, spanName) != null) { + return spans; + } + Thread.sleep(200); + } + throw new RuntimeException( + "timed out waiting for BT span '%s' (received %d spans so far: %s)" + .formatted(spanName, backend.getAllSpans().size(), backend.getAllSpans())); + } + + private static void assertTrue(boolean condition, String failMessage) { + if (!condition) { + throw new RuntimeException(failMessage); + } + } + + private static void assertBtAgentDetection() { + if (BT_AGENT_EXPECTED != null) { + if (BT_AGENT_EXPECTED && !BT_AGENT_ENABLED) { + throw new RuntimeException( + "BT_AGENT_EXPECTED=true but agent was NOT detected." + + " Detection logic may be broken."); + } + if (!BT_AGENT_EXPECTED && BT_AGENT_ENABLED) { + throw new RuntimeException( + "BT_AGENT_EXPECTED=false but agent WAS detected." + + " Agent may be leaking onto the classpath."); + } + System.out.println( + "[no-otel-smoke-test] BT agent detection: OK (expected=%s, detected=%s)" + .formatted(BT_AGENT_EXPECTED, BT_AGENT_ENABLED)); + } + } + + private static Boolean parseBooleanEnv(String name) { + String value = System.getenv(name); + return value != null ? Boolean.parseBoolean(value) : null; + } + + private static boolean isBraintrustAgentPresent() { + try { + Class.forName( + "dev.braintrust.system.AgentBootstrap", + false, + ClassLoader.getSystemClassLoader()); + return true; + } catch (Exception e) { + return false; + } + } +} diff --git a/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/MockBraintrustBackend.java b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/MockBraintrustBackend.java new file mode 100644 index 00000000..6ca3eced --- /dev/null +++ b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/MockBraintrustBackend.java @@ -0,0 +1,272 @@ +package dev.braintrust.smoketest.ddagent; + +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpServer; +import io.opentelemetry.proto.collector.trace.v1.ExportTraceServiceRequest; +import io.opentelemetry.proto.common.v1.AnyValue; +import io.opentelemetry.proto.common.v1.KeyValue; +import io.opentelemetry.proto.trace.v1.ResourceSpans; +import io.opentelemetry.proto.trace.v1.ScopeSpans; +import io.opentelemetry.proto.trace.v1.Span; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +/** + * A mock Braintrust OTLP backend that captures trace export requests sent to {@code POST + * /otel/v1/traces}. + * + *

The Braintrust agent exports spans via HTTP/Protobuf (OtlpHttpSpanExporter). This server + * accepts those requests, parses the protobuf payloads, and stores decoded spans for assertion. + */ +public class MockBraintrustBackend { + + /** A decoded OTLP span with its attributes. */ + public static class OtlpSpan { + public final String name; + public final String traceId; + public final String spanId; + public final String parentSpanId; + public final Span.SpanKind kind; + public final long startTimeUnixNano; + public final long endTimeUnixNano; + public final Map attributes; + public final int statusCode; + public final String statusMessage; + + OtlpSpan(Span protoSpan) { + this.name = protoSpan.getName(); + this.traceId = hexString(protoSpan.getTraceId().toByteArray()); + this.spanId = hexString(protoSpan.getSpanId().toByteArray()); + this.parentSpanId = hexString(protoSpan.getParentSpanId().toByteArray()); + this.kind = protoSpan.getKind(); + this.startTimeUnixNano = protoSpan.getStartTimeUnixNano(); + this.endTimeUnixNano = protoSpan.getEndTimeUnixNano(); + this.attributes = extractAttributes(protoSpan.getAttributesList()); + this.statusCode = protoSpan.getStatus().getCodeValue(); + this.statusMessage = protoSpan.getStatus().getMessage(); + } + + /** Get a string attribute value, or null if not present. */ + public String stringAttr(String key) { + Object v = attributes.get(key); + return v instanceof String s ? s : null; + } + + /** Get a boolean attribute value, or null if not present. */ + public Boolean boolAttr(String key) { + Object v = attributes.get(key); + return v instanceof Boolean b ? b : null; + } + + /** Get a long attribute value, or null if not present. */ + public Long longAttr(String key) { + Object v = attributes.get(key); + return v instanceof Long l ? l : null; + } + + @Override + public String toString() { + return "OtlpSpan{name='%s', traceId=%s, spanId=%s, parentSpanId=%s, kind=%s, attrs=%s}" + .formatted(name, traceId, spanId, parentSpanId, kind, attributes.keySet()); + } + + private static Map extractAttributes(List kvList) { + Map map = new HashMap<>(kvList.size()); + for (KeyValue kv : kvList) { + AnyValue v = kv.getValue(); + Object value = + switch (v.getValueCase()) { + case STRING_VALUE -> v.getStringValue(); + case BOOL_VALUE -> v.getBoolValue(); + case INT_VALUE -> v.getIntValue(); + case DOUBLE_VALUE -> v.getDoubleValue(); + default -> v.toString(); + }; + map.put(kv.getKey(), value); + } + return map; + } + + private static String hexString(byte[] bytes) { + if (bytes == null || bytes.length == 0) return ""; + var sb = new StringBuilder(bytes.length * 2); + for (byte b : bytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } + } + + /** A captured OTLP trace export request with parsed spans. */ + public static class CapturedRequest { + public final byte[] body; + public final Map> headers; + public final List spans; + + CapturedRequest(byte[] body, Map> headers, List spans) { + this.body = body; + this.headers = headers; + this.spans = spans; + } + + /** Returns the value of the x-bt-parent header, or null if not present. */ + public String btParent() { + var values = headers.get("X-bt-parent"); + if (values == null) values = headers.get("x-bt-parent"); + return values != null && !values.isEmpty() ? values.get(0) : null; + } + + /** Returns the Authorization header value, or null if not present. */ + public String authorization() { + var values = headers.get("Authorization"); + if (values == null) values = headers.get("authorization"); + return values != null && !values.isEmpty() ? values.get(0) : null; + } + } + + private final CopyOnWriteArrayList traceRequests = + new CopyOnWriteArrayList<>(); + private final CopyOnWriteArrayList logRequests = new CopyOnWriteArrayList<>(); + private final CountDownLatch traceLatch; + private final HttpServer server; + private final int port; + + /** + * @param expectedTraceRequestCount how many trace export requests to wait for before unblocking + * {@link #awaitTraces}. + */ + public MockBraintrustBackend(int expectedTraceRequestCount, int port) throws IOException { + this.traceLatch = new CountDownLatch(expectedTraceRequestCount); + this.server = HttpServer.create(new InetSocketAddress("127.0.0.1", port), 0); + this.port = server.getAddress().getPort(); + + server.createContext("/otel/v1/traces", this::handleTraces); + server.createContext("/otel/v1/logs", this::handleLogs); + server.createContext("/", this::handleDefault); + } + + public void start() { + server.start(); + System.out.println("[mock-bt-backend] Listening on port " + port); + } + + public void stop() { + server.stop(0); + } + + public int getPort() { + return port; + } + + /** Block until the expected number of trace export requests have been received, or timeout. */ + public boolean awaitTraces(long timeout, TimeUnit unit) throws InterruptedException { + return traceLatch.await(timeout, unit); + } + + /** Returns all captured trace export requests. */ + public List getTraceRequests() { + return Collections.unmodifiableList(traceRequests); + } + + /** Returns all captured log export requests. */ + public List getLogRequests() { + return Collections.unmodifiableList(logRequests); + } + + /** Total number of trace export requests received. */ + public int traceRequestCount() { + return traceRequests.size(); + } + + /** Total number of log export requests received. */ + public int logRequestCount() { + return logRequests.size(); + } + + /** Returns a flattened list of all spans across all trace requests. */ + public List getAllSpans() { + List all = new ArrayList<>(); + for (var req : traceRequests) { + all.addAll(req.spans); + } + return all; + } + + /** Find a span by name across all received trace requests. */ + public OtlpSpan findSpanByName(String name) { + for (var req : traceRequests) { + for (var span : req.spans) { + if (name.equals(span.name)) { + return span; + } + } + } + return null; + } + + private void handleTraces(HttpExchange exchange) throws IOException { + if (!"POST".equalsIgnoreCase(exchange.getRequestMethod())) { + exchange.sendResponseHeaders(405, -1); + exchange.close(); + return; + } + byte[] body = exchange.getRequestBody().readAllBytes(); + List spans = new ArrayList<>(); + try { + ExportTraceServiceRequest request = ExportTraceServiceRequest.parseFrom(body); + for (ResourceSpans rs : request.getResourceSpansList()) { + for (ScopeSpans ss : rs.getScopeSpansList()) { + for (Span span : ss.getSpansList()) { + spans.add(new OtlpSpan(span)); + } + } + } + } catch (Exception e) { + System.err.println("[mock-bt-backend] Failed to parse protobuf: " + e.getMessage()); + } + var captured = new CapturedRequest(body, exchange.getRequestHeaders(), spans); + traceRequests.add(captured); + traceLatch.countDown(); + + System.out.println( + "[mock-bt-backend] Received trace export (%d bytes, %d spans, x-bt-parent=%s)" + .formatted(body.length, spans.size(), captured.btParent())); + + // OTLP expects a 200 with an empty ExportTraceServiceResponse protobuf. + exchange.sendResponseHeaders(200, 0); + exchange.getResponseBody().close(); + } + + private void handleLogs(HttpExchange exchange) throws IOException { + if (!"POST".equalsIgnoreCase(exchange.getRequestMethod())) { + exchange.sendResponseHeaders(405, -1); + exchange.close(); + return; + } + byte[] body = exchange.getRequestBody().readAllBytes(); + var captured = new CapturedRequest(body, exchange.getRequestHeaders(), List.of()); + logRequests.add(captured); + + System.out.println( + "[mock-bt-backend] Received log export (%d bytes)".formatted(body.length)); + + exchange.sendResponseHeaders(200, 0); + exchange.getResponseBody().close(); + } + + private void handleDefault(HttpExchange exchange) throws IOException { + System.out.println( + "[mock-bt-backend] Unexpected request: %s %s" + .formatted(exchange.getRequestMethod(), exchange.getRequestURI())); + exchange.sendResponseHeaders(404, -1); + exchange.close(); + } +} diff --git a/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/MockDdAgentServer.java b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/MockDdAgentServer.java new file mode 100644 index 00000000..556df29e --- /dev/null +++ b/braintrust-java-agent/smoke-test/dd-agent/src/main/java/dev/braintrust/smoketest/ddagent/MockDdAgentServer.java @@ -0,0 +1,240 @@ +package dev.braintrust.smoketest.ddagent; + +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpServer; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import org.msgpack.core.MessageFormat; +import org.msgpack.core.MessagePack; +import org.msgpack.core.MessageUnpacker; + +/** + * A mock DD Agent HTTP server that captures trace payloads sent to {@code PUT /v0.4/traces}. + * + *

The DD Java agent sends traces as msgpack-encoded arrays of arrays of span maps. This server + * parses those payloads and stores the decoded spans for assertion. + */ +public class MockDdAgentServer { + + /** A decoded DD span (subset of fields we care about). */ + public static class DdSpan { + public final String name; + public final String service; + public final String resource; + public final long traceId; + public final long spanId; + public final long parentId; + public final long start; // nanoseconds + public final long duration; // nanoseconds + public final int error; + public final String type; + public final Map meta; + + DdSpan( + String name, + String service, + String resource, + long traceId, + long spanId, + long parentId, + long start, + long duration, + int error, + String type, + Map meta) { + this.name = name; + this.service = service; + this.resource = resource; + this.traceId = traceId; + this.spanId = spanId; + this.parentId = parentId; + this.start = start; + this.duration = duration; + this.error = error; + this.type = type; + this.meta = meta; + } + + @Override + public String toString() { + return "DdSpan{name='%s', service='%s', resource='%s', traceId=%d, spanId=%d, parentId=%d, error=%d}" + .formatted(name, service, resource, traceId, spanId, parentId, error); + } + } + + /** One trace = a list of spans sharing the same trace ID. */ + private final CopyOnWriteArrayList> receivedTraces = new CopyOnWriteArrayList<>(); + + private final CountDownLatch traceLatch; + private final HttpServer server; + private final int port; + + /** + * @param expectedTraceCount how many trace payloads to wait for before unblocking {@link + * #awaitTraces}. + */ + public MockDdAgentServer(int expectedTraceCount, int port) throws IOException { + this.traceLatch = new CountDownLatch(expectedTraceCount); + this.server = HttpServer.create(new InetSocketAddress("127.0.0.1", port), 0); + this.port = server.getAddress().getPort(); + + // DD agent sends PUT /v0.3/traces or /v0.4/traces or /v0.5/traces + server.createContext("/", this::handleRequest); + } + + public void start() { + server.start(); + System.out.println("[mock-dd-agent] Listening on port " + port); + } + + public void stop() { + server.stop(0); + } + + public int getPort() { + return port; + } + + /** Block until the expected number of trace batches have been received, or timeout. */ + public boolean awaitTraces(long timeout, TimeUnit unit) throws InterruptedException { + return traceLatch.await(timeout, unit); + } + + /** Returns all received traces (each trace is a list of spans). */ + public List> getReceivedTraces() { + return Collections.unmodifiableList(receivedTraces); + } + + /** Flattened list of all received spans across all traces. */ + public List getAllSpans() { + List all = new ArrayList<>(); + for (var trace : receivedTraces) { + all.addAll(trace); + } + return all; + } + + private void handleRequest(HttpExchange exchange) throws IOException { + String method = exchange.getRequestMethod(); + String path = exchange.getRequestURI().getPath(); + + // DD agent sends PUT /v0.X/traces + if ("PUT".equalsIgnoreCase(method) && path.matches("/v0\\.\\d+/traces")) { + byte[] body = exchange.getRequestBody().readAllBytes(); + try { + List> traces = decodeMsgpack(body); + for (var trace : traces) { + receivedTraces.add(trace); + traceLatch.countDown(); + } + System.out.println( + "[mock-dd-agent] Received %d trace(s) on %s" + .formatted(traces.size(), path)); + } catch (Exception e) { + System.err.println( + "[mock-dd-agent] Failed to decode msgpack on %s (%d bytes): %s" + .formatted(path, body.length, e.getMessage())); + e.printStackTrace(); + } + // Respond 200 with a rates response (DD agent expects JSON with rate_by_service) + byte[] resp = "{\"rate_by_service\":{}}".getBytes(); + exchange.getResponseHeaders().set("Content-Type", "application/json"); + exchange.sendResponseHeaders(200, resp.length); + exchange.getResponseBody().write(resp); + exchange.getResponseBody().close(); + } else { + // Other endpoints the DD agent may probe (e.g., /info, /v0.7/config) + byte[] resp = "{}".getBytes(); + exchange.getResponseHeaders().set("Content-Type", "application/json"); + exchange.sendResponseHeaders(200, resp.length); + exchange.getResponseBody().write(resp); + exchange.getResponseBody().close(); + } + } + + /** + * Decodes the DD msgpack trace payload. Format: array of traces, where each trace is an array + * of spans, and each span is a map. + */ + static List> decodeMsgpack(byte[] data) throws IOException { + List> traces = new ArrayList<>(); + try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(data)) { + int numTraces = unpacker.unpackArrayHeader(); + for (int t = 0; t < numTraces; t++) { + int numSpans = unpacker.unpackArrayHeader(); + List trace = new ArrayList<>(numSpans); + for (int s = 0; s < numSpans; s++) { + trace.add(decodeSpan(unpacker)); + } + traces.add(trace); + } + } + return traces; + } + + private static DdSpan decodeSpan(MessageUnpacker unpacker) throws IOException { + int mapSize = unpacker.unpackMapHeader(); + String name = ""; + String service = ""; + String resource = ""; + long traceId = 0; + long spanId = 0; + long parentId = 0; + long start = 0; + long duration = 0; + int error = 0; + String type = ""; + Map meta = Map.of(); + + for (int i = 0; i < mapSize; i++) { + String key = unpacker.unpackString(); + if (unpacker.tryUnpackNil()) { + // Value is nil — skip it, keep defaults. + continue; + } + switch (key) { + case "name" -> name = unpacker.unpackString(); + case "service" -> service = unpacker.unpackString(); + case "resource" -> resource = unpacker.unpackString(); + case "trace_id" -> traceId = unpacker.unpackBigInteger().longValue(); + case "span_id" -> spanId = unpacker.unpackBigInteger().longValue(); + case "parent_id" -> parentId = unpacker.unpackBigInteger().longValue(); + case "start" -> start = unpacker.unpackLong(); + case "duration" -> duration = unpacker.unpackLong(); + case "error" -> error = unpacker.unpackInt(); + case "type" -> type = unpacker.unpackString(); + case "meta" -> meta = unpackStringMap(unpacker); + default -> unpacker.skipValue(); + } + } + return new DdSpan( + name, service, resource, traceId, spanId, parentId, start, duration, error, type, + meta); + } + + private static Map unpackStringMap(MessageUnpacker unpacker) + throws IOException { + int size = unpacker.unpackMapHeader(); + Map map = new HashMap<>(size); + for (int i = 0; i < size; i++) { + // Keys are always strings, but values might not be + String k = unpacker.unpackString(); + MessageFormat fmt = unpacker.getNextFormat(); + if (fmt.getValueType() == org.msgpack.value.ValueType.STRING) { + map.put(k, unpacker.unpackString()); + } else { + // Skip non-string values (metrics map may have numbers) + unpacker.skipValue(); + } + } + return map; + } +} diff --git a/braintrust-java-agent/src/test/java/dev/braintrust/system/AgentBootstrapTest.java b/braintrust-java-agent/src/test/java/dev/braintrust/system/AgentBootstrapTest.java index 17ec8852..e650c44d 100644 --- a/braintrust-java-agent/src/test/java/dev/braintrust/system/AgentBootstrapTest.java +++ b/braintrust-java-agent/src/test/java/dev/braintrust/system/AgentBootstrapTest.java @@ -126,6 +126,8 @@ void correctClasspath() throws Exception { btLoaderClasses.add(className); } catch (ClassNotFoundException | NoClassDefFoundError e) { if (className.startsWith("dev.braintrust") + && (!className.startsWith( + "dev.braintrust.agent.dd.")) && (!className.startsWith( "dev.braintrust.instrumentation."))) { fail(e); diff --git a/braintrust-sdk/src/main/java/dev/braintrust/trace/BraintrustTracing.java b/braintrust-sdk/src/main/java/dev/braintrust/trace/BraintrustTracing.java index f648bccd..89380463 100644 --- a/braintrust-sdk/src/main/java/dev/braintrust/trace/BraintrustTracing.java +++ b/braintrust-sdk/src/main/java/dev/braintrust/trace/BraintrustTracing.java @@ -5,6 +5,7 @@ import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.baggage.propagation.W3CBaggagePropagator; import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.api.trace.TracerProvider; import io.opentelemetry.api.trace.propagation.W3CTraceContextPropagator; import io.opentelemetry.context.propagation.ContextPropagators; import io.opentelemetry.context.propagation.TextMapPropagator; @@ -153,7 +154,12 @@ public static Tracer getTracer() { /** Gets a tracer from a specific OpenTelemetry instance. */ public static Tracer getTracer(OpenTelemetry openTelemetry) { - return openTelemetry.getTracer(INSTRUMENTATION_NAME, INSTRUMENTATION_VERSION); + return getTracer(openTelemetry.getTracerProvider()); + } + + /** Gets the braintrust tracer from a tracer provider */ + public static Tracer getTracer(TracerProvider tracerProvider) { + return tracerProvider.get(INSTRUMENTATION_NAME, INSTRUMENTATION_VERSION); } private static String sdkInfoLogMessage() {