diff --git a/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt b/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt index 3a44ed1e5..0003f9798 100644 --- a/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt +++ b/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt @@ -18,6 +18,7 @@ package com.code_intelligence.jazzer.agent import com.code_intelligence.jazzer.instrumentor.ClassInstrumentor import com.code_intelligence.jazzer.instrumentor.CoverageRecorder +import com.code_intelligence.jazzer.instrumentor.EdgeLocationData import com.code_intelligence.jazzer.instrumentor.Hook import com.code_intelligence.jazzer.instrumentor.InstrumentationType import com.code_intelligence.jazzer.utils.ClassNameGlobber @@ -246,14 +247,15 @@ class RuntimeInstrumentor( // or there will be additional coverage points injected if any calls are inserted // and JaCoCo will produce a broken coverage report. coverageIdSynchronizer.withIdForClass(internalClassName) { firstId -> - coverage(firstId).also { actualNumEdgeIds -> - CoverageRecorder.recordInstrumentedClass( - internalClassName, - bytecode, - firstId, - actualNumEdgeIds, - ) - } + val result = coverage(firstId) + CoverageRecorder.recordInstrumentedClass( + internalClassName, + bytecode, + firstId, + result.numEdges, + ) + registerSourceLocations(result.locations, firstId) + result.numEdges } // Hook instrumentation must be performed after data flow tracing as the injected // bytecode would trigger the GEP callbacks for byte[]. @@ -265,4 +267,21 @@ class RuntimeInstrumentor( instrumentedBytecode } } + + private fun registerSourceLocations( + locations: EdgeLocationData?, + firstEdgeId: Int, + ) { + if (locations == null) return + try { + com.code_intelligence.jazzer.runtime.SourceLocationRegistry.registerLocations( + locations.sourceFile, + locations.methodNames, + firstEdgeId, + locations.edgeData, + ) + } catch (_: UnsatisfiedLinkError) { + // Native library not loaded (e.g. standalone instrumentation). + } + } } diff --git a/src/main/java/com/code_intelligence/jazzer/instrumentor/ClassInstrumentor.kt b/src/main/java/com/code_intelligence/jazzer/instrumentor/ClassInstrumentor.kt index 51617ab0f..f68de627e 100644 --- a/src/main/java/com/code_intelligence/jazzer/instrumentor/ClassInstrumentor.kt +++ b/src/main/java/com/code_intelligence/jazzer/instrumentor/ClassInstrumentor.kt @@ -21,6 +21,11 @@ import com.code_intelligence.jazzer.runtime.CoverageMap fun extractClassFileMajorVersion(classfileBuffer: ByteArray): Int = ((classfileBuffer[6].toInt() and 0xff) shl 8) or (classfileBuffer[7].toInt() and 0xff) +data class CoverageResult( + val numEdges: Int, + val locations: EdgeLocationData?, +) + class ClassInstrumentor( private val internalClassName: String, bytecode: ByteArray, @@ -28,7 +33,7 @@ class ClassInstrumentor( var instrumentedBytecode = bytecode private set - fun coverage(initialEdgeId: Int): Int { + fun coverage(initialEdgeId: Int): CoverageResult { val edgeCoverageInstrumentor = EdgeCoverageInstrumentor( defaultEdgeCoverageStrategy, @@ -36,7 +41,7 @@ class ClassInstrumentor( initialEdgeId, ) instrumentedBytecode = edgeCoverageInstrumentor.instrument(internalClassName, instrumentedBytecode) - return edgeCoverageInstrumentor.numEdges + return CoverageResult(edgeCoverageInstrumentor.numEdges, edgeCoverageInstrumentor.buildEdgeLocations()) } fun traceDataFlow(instrumentations: Set) { diff --git a/src/main/java/com/code_intelligence/jazzer/instrumentor/EdgeCoverageInstrumentor.kt b/src/main/java/com/code_intelligence/jazzer/instrumentor/EdgeCoverageInstrumentor.kt index 8fc9c33c3..7e94c7387 100644 --- a/src/main/java/com/code_intelligence/jazzer/instrumentor/EdgeCoverageInstrumentor.kt +++ b/src/main/java/com/code_intelligence/jazzer/instrumentor/EdgeCoverageInstrumentor.kt @@ -30,6 +30,7 @@ import com.code_intelligence.jazzer.third_party.org.jacoco.core.internal.instr.P import org.objectweb.asm.ClassReader import org.objectweb.asm.ClassVisitor import org.objectweb.asm.ClassWriter +import org.objectweb.asm.Label import org.objectweb.asm.MethodVisitor import java.lang.invoke.MethodHandle import java.lang.invoke.MethodHandles.publicLookup @@ -82,6 +83,22 @@ interface EdgeCoverageStrategy { val loadLocalVariableStackSize: Int } +/** + * Per-edge source location data collected during instrumentation. + * + * @param sourceFile Qualified source path, e.g. "com/example/Foo.java" + * @param methodNames Deduplicated method name table (SimpleClassName.method) + * @param edgeData Flat array: [packedLine0, methodIdx0, packedLine1, methodIdx1, ...] + * The sign bit of each packedLine encodes isFunctionEntry. + */ +class EdgeLocationData( + val sourceFile: String, + val methodNames: Array, + val edgeData: IntArray, +) { + override fun toString(): String = "EdgeLocationData(sourceFile=$sourceFile, methods=${methodNames.size}, edges=${edgeData.size / 2})" +} + // An instance of EdgeCoverageInstrumentor should only be used to instrument a single class as it // internally tracks the edge IDs, which have to be globally unique. class EdgeCoverageInstrumentor( @@ -106,10 +123,38 @@ class EdgeCoverageInstrumentor( ), ) + // ── Source location tracking ────────────────────────────────── + private var sourceFile: String = "" + private var simpleClassName: String = "" + private val methodNameList = mutableListOf() + private val methodNameIndex = mutableMapOf() + private val locationData = mutableListOf() + + private fun internMethodName(name: String): Int = + methodNameIndex.getOrPut(name) { + methodNameList.add(name) + methodNameList.size - 1 + } + + /** Returns the collected edge locations, or null if no edges were instrumented. */ + fun buildEdgeLocations(): EdgeLocationData? { + if (numEdges == 0) return null + return EdgeLocationData( + sourceFile, + methodNameList.toTypedArray(), + locationData.toIntArray(), + ) + } + override fun instrument( internalClassName: String, bytecode: ByteArray, ): ByteArray { + val lastSlash = internalClassName.lastIndexOf('/') + simpleClassName = if (lastSlash >= 0) internalClassName.substring(lastSlash + 1) else internalClassName + // Fallback source file if the class has no SourceFile attribute. + sourceFile = internalClassName + val reader = InstrSupport.classReaderFor(bytecode) val writer = ClassWriter(reader, 0) val version = InstrSupport.getMajorVersion(reader) @@ -144,6 +189,8 @@ class EdgeCoverageInstrumentor( /** * A [ProbeInserter] that injects bytecode instrumentation at every control flow edge and * modifies the stack size and number of local variables accordingly. + * + * Also records the source location of each probe for PC symbolization. */ private inner class EdgeCoverageProbeInserter( access: Int, @@ -152,7 +199,25 @@ class EdgeCoverageInstrumentor( mv: MethodVisitor, arrayStrategy: IProbeArrayStrategy, ) : ProbeInserter(access, name, desc, mv, arrayStrategy) { + private var currentLine = 0 + private var isFirstProbe = true + private val methodIdx = internMethodName("$simpleClassName.$name") + + override fun visitLineNumber( + line: Int, + start: Label, + ) { + currentLine = line + super.visitLineNumber(line, start) + } + override fun insertProbe(id: Int) { + // Pack isFuncEntry into the sign bit of the line number. + val packedLine = if (isFirstProbe) (currentLine or (1 shl 31)) else currentLine + locationData.add(packedLine) + locationData.add(methodIdx) + isFirstProbe = false + strategy.instrumentControlFlowEdge(mv, id, variable, coverageMapInternalClassName) } @@ -182,6 +247,20 @@ class EdgeCoverageInstrumentor( ) : ClassProbesAdapter(cpv, trackFrames) { override fun nextId(): Int = nextEdgeId() + override fun visitSource( + source: String?, + debug: String?, + ) { + if (source != null) { + // sourceFile was initialized to the internal class name (e.g. "com/example/Outer$Inner") + // in instrument(). Extract the package prefix and prepend it to the declared source + // file name, giving e.g. "com/example/Outer.java". + val packageEnd = sourceFile.lastIndexOf('/') + sourceFile = if (packageEnd >= 0) sourceFile.substring(0, packageEnd + 1) + source else source + } + super.visitSource(source, debug) + } + override fun visitEnd() { cpv.visitTotalProbeCount(numEdges) // Avoid calling super.visitEnd() as that invokes cpv.visitTotalProbeCount with an diff --git a/src/main/java/com/code_intelligence/jazzer/runtime/BUILD.bazel b/src/main/java/com/code_intelligence/jazzer/runtime/BUILD.bazel index dcd90af08..7f125c6d3 100644 --- a/src/main/java/com/code_intelligence/jazzer/runtime/BUILD.bazel +++ b/src/main/java/com/code_intelligence/jazzer/runtime/BUILD.bazel @@ -161,6 +161,15 @@ java_jni_library( ], ) +java_jni_library( + name = "source_location_registry", + srcs = ["SourceLocationRegistry.java"], + visibility = [ + "//src/main/java/com/code_intelligence/jazzer/instrumentor:__pkg__", + "//src/main/native/com/code_intelligence/jazzer/driver:__pkg__", + ], +) + java_library( name = "runtime", srcs = [ @@ -188,6 +197,7 @@ java_library( ":constants", ":coverage_map", ":extra_counters_tracker", + ":source_location_registry", ":trace_data_flow_native_callbacks", "//src/main/java/com/code_intelligence/jazzer/api:hooks", ], diff --git a/src/main/java/com/code_intelligence/jazzer/runtime/SourceLocationRegistry.java b/src/main/java/com/code_intelligence/jazzer/runtime/SourceLocationRegistry.java new file mode 100644 index 000000000..77d7c124b --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/runtime/SourceLocationRegistry.java @@ -0,0 +1,46 @@ +/* + * Copyright 2026 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.runtime; + +import com.github.fmeum.rules_jni.RulesJni; + +/** + * Registers per-edge source location metadata with the native symbolizer so that libFuzzer's {@code + * -print_pcs}, {@code -print_funcs}, and {@code -print_coverage} flags show Java source locations + * instead of meaningless hex addresses. + * + *

Called from the instrumentor after each class is instrumented. Thread-safe on the native side. + */ +public final class SourceLocationRegistry { + static { + RulesJni.loadLibrary("jazzer_driver", "/com/code_intelligence/jazzer/driver"); + } + + private SourceLocationRegistry() {} + + /** + * Register source locations for a contiguous range of coverage edge IDs. + * + * @param sourceFile Qualified source path (e.g. "com/example/Foo.java") + * @param methodNames Deduplicated method name table for this class + * @param firstEdgeId Global ID of the first edge in this class + * @param edgeData Flat array: [packedLine0, methodIdx0, packedLine1, methodIdx1, ...]. The sign + * bit of each packedLine encodes whether the edge is a function entry point. + */ + public static native void registerLocations( + String sourceFile, String[] methodNames, int firstEdgeId, int[] edgeData); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel index 25e10d882..c76a05a33 100644 --- a/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel +++ b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel @@ -30,6 +30,7 @@ cc_library( ":jazzer_fuzzer_callbacks", ":libfuzzer_callbacks", ":mutator", + ":synthetic_symbolizer", ], ) @@ -56,6 +57,18 @@ cc_library( alwayslink = True, ) +cc_library( + name = "synthetic_symbolizer", + srcs = ["synthetic_symbolizer.cpp"], + hdrs = ["synthetic_symbolizer.h"], + deps = [ + ":counters_tracker", + "//src/main/java/com/code_intelligence/jazzer/runtime:source_location_registry.hdrs", + ], + # JNI symbols are only referenced dynamically. + alwayslink = True, +) + cc_library( name = "fuzz_target_runner", srcs = ["fuzz_target_runner.cpp"], @@ -156,6 +169,7 @@ cc_library( cc_library( name = "sanitizer_symbols", srcs = ["sanitizer_symbols.cpp"], + deps = [":synthetic_symbolizer"], # Symbols are referenced dynamically by libFuzzer. alwayslink = True, ) @@ -175,3 +189,14 @@ cc_test( "@rules_jni//jni", ], ) + +cc_test( + name = "synthetic_symbolizer_test", + size = "small", + srcs = ["synthetic_symbolizer_test.cpp"], + deps = [ + ":synthetic_symbolizer", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) diff --git a/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.cpp b/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.cpp index a43d0fb11..7d6396ab9 100644 --- a/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.cpp +++ b/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.cpp @@ -37,6 +37,15 @@ void AssertNoException(JNIEnv &env) { _Exit(1); } } + +// Tracks a registered PC table batch so we can update PCFlags later. +struct PCTableBatch { + uintptr_t pc_base; + std::size_t count; + jazzer::PCTableEntry *entries; +}; + +std::vector gCoveragePCBatches; } // namespace namespace jazzer { @@ -45,24 +54,27 @@ uint8_t *CountersTracker::coverage_counters_ = nullptr; uint8_t *CountersTracker::extra_counters_ = nullptr; std::mutex CountersTracker::mutex_; -void CountersTracker::RegisterCounterRange(uint8_t *start, uint8_t *end) { +void CountersTracker::RegisterCounterRange(uint8_t *start, uint8_t *end, + uintptr_t pc_base, + bool track_batch) { if (start >= end) { return; } std::size_t num_counters = end - start; - // libFuzzer requires an array containing the instruction addresses associated - // with the coverage counters. Since these may be synthetic counters (not - // associated with real code), we create PC entries with the flag set to 0 to - // indicate they are not real PCs. The PC value is set to the counter index - // for identification purposes. + // libFuzzer pairs each 8-bit counter with a PC table entry. We assign + // globally unique synthetic PCs so the symbolizer can resolve them back + // to Java source locations. PCTableEntry *pc_entries = new PCTableEntry[num_counters]; for (std::size_t i = 0; i < num_counters; ++i) { - pc_entries[i] = {i, 0}; + pc_entries[i] = {pc_base + i, 0}; } std::lock_guard lock(mutex_); + if (track_batch) { + gCoveragePCBatches.push_back({pc_base, num_counters, pc_entries}); + } __sanitizer_cov_8bit_counters_init(start, end); __sanitizer_cov_pcs_init( reinterpret_cast(pc_entries), @@ -94,8 +106,12 @@ void CountersTracker::RegisterNewCounters(JNIEnv &env, jint old_num_counters, << std::endl; _Exit(1); } + // Coverage counters use the global edge ID as the PC value and + // track the batch so SetCoveragePCFlags can update entries later. RegisterCounterRange(coverage_counters_ + old_num_counters, - coverage_counters_ + new_num_counters); + coverage_counters_ + new_num_counters, + static_cast(old_num_counters), + /*track_batch=*/true); } void CountersTracker::InitializeExtra(JNIEnv &env, jlong counters) { @@ -123,8 +139,21 @@ void CountersTracker::RegisterExtraCounters(JNIEnv &env, jint start_offset, << std::endl; _Exit(1); } + // Extra counters use a disjoint PC range so the symbolizer can tell them + // apart from coverage counters. RegisterCounterRange(extra_counters_ + start_offset, - extra_counters_ + end_offset); + extra_counters_ + end_offset, + kExtraCountersPCBase + start_offset); +} + +void CountersTracker::SetCoveragePCFlags(std::size_t edge_id, uintptr_t flags) { + std::lock_guard lock(mutex_); + for (auto &batch : gCoveragePCBatches) { + if (edge_id >= batch.pc_base && edge_id < batch.pc_base + batch.count) { + batch.entries[edge_id - batch.pc_base].PCFlags |= flags; + return; + } + } } } // namespace jazzer @@ -149,13 +178,22 @@ Java_com_code_1intelligence_jazzer_runtime_CoverageMap_getEverCoveredIds( JNIEnv *env, jclass) { uintptr_t *covered_pcs; jint num_covered_pcs = __sanitizer_cov_get_observed_pcs(&covered_pcs); - std::vector covered_edge_ids(covered_pcs, - covered_pcs + num_covered_pcs); + + // Filter out extra-counter PCs (>= kExtraCountersPCBase) which would + // overflow jint and corrupt Java-side coverage analysis. + std::vector covered_edge_ids; + covered_edge_ids.reserve(num_covered_pcs); + for (jint i = 0; i < num_covered_pcs; ++i) { + if (covered_pcs[i] < jazzer::kExtraCountersPCBase) { + covered_edge_ids.push_back(static_cast(covered_pcs[i])); + } + } delete[] covered_pcs; - jintArray covered_edge_ids_jni = env->NewIntArray(num_covered_pcs); + jint count = static_cast(covered_edge_ids.size()); + jintArray covered_edge_ids_jni = env->NewIntArray(count); AssertNoException(*env); - env->SetIntArrayRegion(covered_edge_ids_jni, 0, num_covered_pcs, + env->SetIntArrayRegion(covered_edge_ids_jni, 0, count, covered_edge_ids.data()); AssertNoException(*env); return covered_edge_ids_jni; diff --git a/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.h b/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.h index 999b7a13d..2d9e3142e 100644 --- a/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.h +++ b/src/main/native/com/code_intelligence/jazzer/driver/counters_tracker.h @@ -26,6 +26,10 @@ struct __attribute__((packed)) PCTableEntry { [[maybe_unused]] uintptr_t PC, PCFlags; }; +// Extra counters live in a disjoint PC range so the symbolizer can +// distinguish them from coverage counters (which carry source locations). +constexpr uintptr_t kExtraCountersPCBase = 0x80000000UL; + // CountersTracker manages coverage counter arrays and registers them with // libFuzzer. It handles two separate counter regions: // - Coverage counters: for bytecode edge coverage (used by CoverageMap) @@ -38,7 +42,12 @@ class CountersTracker { static std::mutex mutex_; // Shared helper to register a counter range with libFuzzer. - static void RegisterCounterRange(uint8_t *start, uint8_t *end); + // pc_base is the PC value assigned to the first counter in the range; + // subsequent counters get pc_base+1, pc_base+2, etc. + // If track_batch is true, the PC table entries are recorded so that + // SetCoveragePCFlags can update them later (used for coverage counters). + static void RegisterCounterRange(uint8_t *start, uint8_t *end, + uintptr_t pc_base, bool track_batch = false); public: // For CoverageMap: initialize coverage counters base address. @@ -54,6 +63,10 @@ class CountersTracker { // For ExtraCountersTracker.java: register extra counters with libFuzzer. static void RegisterExtraCounters(JNIEnv &env, jint start_offset, jint end_offset); + + // Set additional flags on the PC table entry for a coverage edge. + // Used by the symbolizer to mark function-entry edges (PCFlags |= 1). + static void SetCoveragePCFlags(std::size_t edge_id, uintptr_t flags); }; } // namespace jazzer diff --git a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp index 4c420f260..f699a31f3 100644 --- a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp +++ b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "synthetic_symbolizer.h" + // Suppress libFuzzer warnings about missing sanitizer methods in non-sanitizer // builds. extern "C" [[maybe_unused]] int __sanitizer_acquire_crash_state() { return 1; } @@ -24,3 +26,13 @@ void DumpJvmStackTraces(); extern "C" [[maybe_unused]] void __sanitizer_print_stack_trace() { jazzer::DumpJvmStackTraces(); } + +// Override libFuzzer's weak __sanitizer_symbolize_pc so that -print_pcs=1, +// -print_funcs=1, and -print_coverage=1 show Java source locations. +extern "C" [[maybe_unused]] void __sanitizer_symbolize_pc(void *pc, + const char *fmt, + char *out_buf, + size_t out_buf_size) { + jazzer::SymbolizePC(reinterpret_cast(pc), fmt, out_buf, + out_buf_size); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer.cpp b/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer.cpp new file mode 100644 index 000000000..a3d236ad6 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer.cpp @@ -0,0 +1,277 @@ +// Copyright 2026 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Implements PC symbolization for Jazzer's synthetic (fake) coverage PCs. +// +// Java bytecode instrumentation assigns each control-flow edge a globally +// unique integer PC. This module maps those PCs back to Java source +// locations (file, method, line) so that libFuzzer's -print_pcs, +// -print_funcs, and -print_coverage flags produce human-readable output. + +#include "synthetic_symbolizer.h" + +#include + +#include +#include +#include +#include +#include + +#include "counters_tracker.h" + +// The JNI header is auto-generated by rules_jni from +// SourceLocationRegistry.java. +#include "com_code_intelligence_jazzer_runtime_SourceLocationRegistry.h" + +// ── Async-signal-safe spinlock ─────────────────────────────────── +// +// std::mutex is NOT async-signal-safe. If a crash/timeout signal fires +// while registerLocations holds a std::mutex, the signal handler would +// deadlock trying to symbolize the crash. We use std::atomic_flag with +// a bounded spin so the reader (SymbolizePC) either acquires the lock +// quickly or gives up and returns "". + +namespace { + +class SpinLock { + public: + // Used by writers (registerLocations). Spins until acquired. + void lock() { + while (flag_.test_and_set(std::memory_order_acquire)) { +#if defined(__x86_64__) || defined(__i386__) + __builtin_ia32_pause(); +#elif defined(__aarch64__) || defined(__arm__) + asm volatile("yield"); +#endif + } + } + + // Used by readers (SymbolizePC, possibly from signal handlers). + // Spins for at most `max_spins` iterations, then gives up. + bool try_lock(int max_spins = 4096) { + for (int i = 0; i < max_spins; ++i) { + if (!flag_.test_and_set(std::memory_order_acquire)) { + return true; + } + } + return false; + } + + void unlock() { flag_.clear(std::memory_order_release); } + + private: + std::atomic_flag flag_ = ATOMIC_FLAG_INIT; +}; + +// ── Data structures ────────────────────────────────────────────── + +constexpr uint32_t kNoLocation = UINT32_MAX; + +struct PCLocation { + uint32_t file_idx = kNoLocation; + uint32_t func_idx = kNoLocation; + uint32_t line = 0; +}; + +SpinLock gLock; + +// O(1) index by string value; O(1) lookup by index during symbolization. +std::vector gStringTable; +std::unordered_map gStringLookup; + +// Indexed by edge ID. file_idx == kNoLocation means "no data registered". +// line == 0 simply means "no debug info" but file/method are still valid. +std::vector gLocations; + +uint32_t InternString(const std::string &s) { + auto it = gStringLookup.find(s); + if (it != gStringLookup.end()) return it->second; + auto idx = static_cast(gStringTable.size()); + gStringTable.push_back(s); + gStringLookup[s] = idx; + return idx; +} + +// Undo libFuzzer's GetNextInstructionPc before lookup. +uintptr_t ToPCTablePC(uintptr_t symbolizer_pc) { +#if defined(__aarch64__) || defined(__arm__) + return symbolizer_pc - 4; +#elif defined(__mips__) + return symbolizer_pc - 8; +#elif defined(__powerpc__) || defined(__sparc__) + return symbolizer_pc - 4; +#else + return symbolizer_pc - 1; +#endif +} + +} // namespace + +// ── JNI: register source locations for a batch of edges ────────── + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_SourceLocationRegistry_registerLocations( + JNIEnv *env, jclass, jstring source_file_jni, jobjectArray method_names_jni, + jint first_edge_id, jintArray edge_data_jni) { + if (!source_file_jni || !method_names_jni || !edge_data_jni) return; + if (first_edge_id < 0) return; + + // Extract Java strings before acquiring the lock. + const char *source_file_c = env->GetStringUTFChars(source_file_jni, nullptr); + std::string source_file(source_file_c); + env->ReleaseStringUTFChars(source_file_jni, source_file_c); + + jsize num_methods = env->GetArrayLength(method_names_jni); + std::vector method_names(num_methods); + for (jsize i = 0; i < num_methods; ++i) { + auto name_jni = + static_cast(env->GetObjectArrayElement(method_names_jni, i)); + const char *name_c = env->GetStringUTFChars(name_jni, nullptr); + method_names[i] = name_c; + env->ReleaseStringUTFChars(name_jni, name_c); + env->DeleteLocalRef(name_jni); + } + + jsize data_len = env->GetArrayLength(edge_data_jni); + if (data_len < 2) return; + std::vector raw_data(data_len); + env->GetIntArrayRegion(edge_data_jni, 0, data_len, raw_data.data()); + + auto base = static_cast(first_edge_id); + std::size_t num_edges = static_cast(data_len) / 2; + + // Collect function-entry edge IDs so we can update PC flags + // outside the spinlock (avoids nested lock: spinlock -> mutex). + std::vector func_entry_edges; + + gLock.lock(); + + uint32_t file_idx = InternString(source_file); + + std::vector func_indices(num_methods); + for (jsize i = 0; i < num_methods; ++i) { + func_indices[i] = InternString(method_names[i]); + } + + if (base + num_edges > gLocations.size()) { + gLocations.resize(base + num_edges); + } + + for (std::size_t i = 0; i < num_edges; ++i) { + auto packed_line = static_cast(raw_data[2 * i]); + auto method_idx_local = static_cast(raw_data[2 * i + 1]); + + bool is_func_entry = (packed_line & 0x80000000u) != 0; + uint32_t line = packed_line & 0x7FFFFFFFu; + + uint32_t func_idx = method_idx_local < func_indices.size() + ? func_indices[method_idx_local] + : 0; + gLocations[base + i] = {file_idx, func_idx, line}; + + if (is_func_entry) { + func_entry_edges.push_back(base + i); + } + } + + gLock.unlock(); + + // Mark function-entry PC flags outside the spinlock. + for (auto edge_id : func_entry_edges) { + jazzer::CountersTracker::SetCoveragePCFlags(edge_id, 1); + } +} + +// ── SymbolizePC ────────────────────────────────────────────────── + +namespace jazzer { + +void SymbolizePC(uintptr_t pc, const char *fmt, char *out_buf, + size_t out_buf_size) { + if (out_buf_size == 0) return; + + auto orig_pc = ToPCTablePC(pc); + + // Copy strings into stack-locals while the lock is held so we + // don't use dangling c_str() pointers after unlock. + char file_buf[512] = ""; + char func_buf[256] = ""; + uint32_t line = 0; + + // Bounded try-lock: bail out if we can't acquire (crash handler safety). + bool locked = gLock.try_lock(); + if (locked) { + if (orig_pc < gLocations.size() && + gLocations[orig_pc].file_idx != kNoLocation) { + const auto &loc = gLocations[orig_pc]; + snprintf(file_buf, sizeof(file_buf), "%s", + gStringTable[loc.file_idx].c_str()); + snprintf(func_buf, sizeof(func_buf), "%s", + gStringTable[loc.func_idx].c_str()); + line = loc.line; + } + gLock.unlock(); + } + + // Format the output according to libFuzzer's format specifiers. + size_t pos = 0; + // Bytes available for content (reserves one byte for the null terminator). + auto remaining = [&]() -> size_t { + return out_buf_size > pos + 1 ? out_buf_size - pos - 1 : 0; + }; + auto advance = [&](int n) { + if (n > 0) pos += std::min(static_cast(n), remaining()); + }; + + for (const char *f = fmt; *f && remaining() > 0; ++f) { + if (*f == '%' && *(f + 1)) { + ++f; + switch (*f) { + case 'p': + // Virtual PCs are meaningless; eat the trailing space. + if (*(f + 1) == ' ') ++f; + break; + case 'F': + advance(snprintf(out_buf + pos, remaining() + 1, "in %s", func_buf)); + break; + case 'L': + advance(snprintf(out_buf + pos, remaining() + 1, "%s:%u", file_buf, + line)); + break; + case 's': + advance(snprintf(out_buf + pos, remaining() + 1, "%s", file_buf)); + break; + case 'l': + advance(snprintf(out_buf + pos, remaining() + 1, "%u", line)); + break; + case 'c': + // No column info from JVM bytecode; output 0. + advance(snprintf(out_buf + pos, remaining() + 1, "0")); + break; + default: + if (remaining() >= 2) { + out_buf[pos++] = '%'; + out_buf[pos++] = *f; + } + break; + } + } else { + out_buf[pos++] = *f; + } + } + out_buf[pos] = '\0'; +} + +} // namespace jazzer diff --git a/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer.h b/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer.h new file mode 100644 index 000000000..96ab4b5d0 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer.h @@ -0,0 +1,31 @@ +// Copyright 2026 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace jazzer { + +// Resolve a synthetic (fake) PC to a human-readable Java source location. +// Called by the __sanitizer_symbolize_pc override in sanitizer_symbols.cpp. +// +// This function is async-signal-safe: it uses a bounded spin-try-lock so it +// will never deadlock if called from a crash handler while registerLocations +// holds the write lock. +void SymbolizePC(uintptr_t pc, const char *fmt, char *out_buf, + size_t out_buf_size); + +} // namespace jazzer diff --git a/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer_test.cpp b/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer_test.cpp new file mode 100644 index 000000000..8e208844b --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/synthetic_symbolizer_test.cpp @@ -0,0 +1,71 @@ +// Copyright 2026 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "synthetic_symbolizer.h" + +#include +#include +#include + +#include "gtest/gtest.h" + +// Stubs for libFuzzer symbols pulled in transitively via counters_tracker. +extern "C" { +void __sanitizer_cov_8bit_counters_init(uint8_t *, uint8_t *) {} +void __sanitizer_cov_pcs_init(const uintptr_t *, const uintptr_t *) {} +size_t __sanitizer_cov_get_observed_pcs(uintptr_t **) { return 0; } +} + +// Helper: call SymbolizePC with an unregistered PC and return the result. +static std::string Symbolize(uintptr_t pc, const char *fmt, + size_t buf_size = 1024) { + std::string buf(buf_size, '\0'); + jazzer::SymbolizePC(pc, fmt, buf.data(), buf_size); + return {buf.c_str()}; +} + +// The default libFuzzer format for DescribePC is "%p %F %L". +// With no registered locations, we should get clean fallback. +TEST(SyntheticSymbolizerTest, UnregisteredPCProducesUnknownFallback) { + auto result = Symbolize(42, "%p %F %L"); + // %p should be eaten (virtual PCs are meaningless), leaving "%F %L". + EXPECT_NE(std::string::npos, result.find("in ")); + EXPECT_NE(std::string::npos, result.find(":0")); + // No hex address should appear (the %p was consumed). + EXPECT_EQ(std::string::npos, result.find("0x")); +} + +// A small buffer should truncate without crashing. +TEST(SyntheticSymbolizerTest, SmallBufferTruncatesSafely) { + char tiny[8] = {}; + jazzer::SymbolizePC(42, "%F %L", tiny, sizeof(tiny)); + // Must be null-terminated and not overflow. + EXPECT_LT(strlen(tiny), sizeof(tiny)); + + // Zero-size buffer is a no-op. + char zero = 'X'; + jazzer::SymbolizePC(42, "%F", &zero, 0); + EXPECT_EQ('X', zero); +} + +// Verify individual format specifiers produce the right fallback shape. +TEST(SyntheticSymbolizerTest, FormatSpecifiers) { + EXPECT_EQ("in ", Symbolize(42, "%F")); + EXPECT_EQ(":0", Symbolize(42, "%L")); + EXPECT_EQ("", Symbolize(42, "%s")); + EXPECT_EQ("0", Symbolize(42, "%l")); + EXPECT_EQ("0", Symbolize(42, "%c")); + // Literal text passes through. + EXPECT_EQ("hello", Symbolize(42, "hello")); +} diff --git a/src/test/java/com/code_intelligence/jazzer/instrumentor/BUILD.bazel b/src/test/java/com/code_intelligence/jazzer/instrumentor/BUILD.bazel index 7d0cf08ac..37e74a4ce 100644 --- a/src/test/java/com/code_intelligence/jazzer/instrumentor/BUILD.bazel +++ b/src/test/java/com/code_intelligence/jazzer/instrumentor/BUILD.bazel @@ -50,6 +50,26 @@ wrapped_kt_jvm_test( ], ) +wrapped_kt_jvm_test( + name = "edge_location_capture_test", + size = "small", + srcs = [ + "CoverageInstrumentationTarget.java", + "EdgeLocationCaptureTest.kt", + "MockCoverageMap.java", + ], + associates = [ + "//src/main/java/com/code_intelligence/jazzer/instrumentor:instrumentor", + ], + test_class = "com.code_intelligence.jazzer.instrumentor.EdgeLocationCaptureTest", + deps = [ + ":patch_test_utils", + "//src/main/java/com/code_intelligence/jazzer/runtime:coverage_map", + "@maven//:junit_junit", + "@rules_kotlin//kotlin/compiler:kotlin-test", + ], +) + wrapped_kt_jvm_test( name = "descriptor_utils_test", size = "small", diff --git a/src/test/java/com/code_intelligence/jazzer/instrumentor/EdgeLocationCaptureTest.kt b/src/test/java/com/code_intelligence/jazzer/instrumentor/EdgeLocationCaptureTest.kt new file mode 100644 index 000000000..d5c2c262e --- /dev/null +++ b/src/test/java/com/code_intelligence/jazzer/instrumentor/EdgeLocationCaptureTest.kt @@ -0,0 +1,78 @@ +/* + * Copyright 2026 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.instrumentor + +import com.code_intelligence.jazzer.instrumentor.PatchTestUtils.classToBytecode +import org.junit.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull +import kotlin.test.assertTrue + +class EdgeLocationCaptureTest { + @Test + fun testEdgeLocationCapture() { + val internalClassName = CoverageInstrumentationTarget::class.java.name.replace('.', '/') + val instrumentor = + EdgeCoverageInstrumentor( + ClassInstrumentor.defaultEdgeCoverageStrategy, + MockCoverageMap::class.java, + 0, + ) + instrumentor.instrument(internalClassName, classToBytecode(CoverageInstrumentationTarget::class.java)) + + val locations = instrumentor.buildEdgeLocations() + assertNotNull(locations, "Expected non-null locations for a class with edges") + + // Source file should combine the package prefix with the SourceFile attribute. + assertEquals( + "com/code_intelligence/jazzer/instrumentor/CoverageInstrumentationTarget.java", + locations.sourceFile, + ) + + // Method names should be qualified as SimpleClassName.method. + assertTrue( + locations.methodNames.any { it == "CoverageInstrumentationTarget." }, + "Expected constructor in method names, got: ${locations.methodNames.toList()}", + ) + assertTrue( + locations.methodNames.any { it == "CoverageInstrumentationTarget.selfCheck" }, + "Expected selfCheck in method names, got: ${locations.methodNames.toList()}", + ) + + // Flat array must have exactly 2 ints per edge (packedLine, methodIdx). + assertEquals(instrumentor.numEdges * 2, locations.edgeData.size) + + // First edge should have the function-entry bit set (sign bit). + val firstPackedLine = locations.edgeData[0] + assertTrue(firstPackedLine < 0, "First edge should have function-entry bit (sign bit) set") + + // Its actual line number should be positive (class was compiled with debug info). + val firstLine = firstPackedLine and 0x7FFFFFFF + assertTrue(firstLine > 0, "Line number should be > 0 for a class with debug info") + + // Second edge of the same method should NOT have the function-entry bit set. + // Find the second edge that shares the same methodIdx as the first. + val firstMethodIdx = locations.edgeData[1] + for (i in 1 until instrumentor.numEdges) { + if (locations.edgeData[2 * i + 1] == firstMethodIdx) { + val subsequentPackedLine = locations.edgeData[2 * i] + assertTrue(subsequentPackedLine >= 0, "Subsequent edge should not have function-entry bit") + break + } + } + } +} diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index bd857bfd2..9902afb3d 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -451,6 +451,30 @@ java_binary( srcs = ["src/test/java/com/example/CrashResistantCoverageTarget.java"], ) +java_binary( + name = "PrintPcsTarget", + testonly = True, + srcs = ["src/test/java/com/example/PrintPcsTarget.java"], + create_executable = False, + deps = ["//deploy:jazzer-api"], +) + +sh_test( + name = "print_pcs_symbolization_test", + size = "large", + srcs = ["src/test/shell/print_pcs_symbolization_test.sh"], + args = [ + "$(rlocationpath //launcher:jazzer)", + "$(rlocationpath :PrintPcsTarget_deploy.jar)", + ], + data = [ + ":PrintPcsTarget_deploy.jar", + "//launcher:jazzer", + ], + target_compatible_with = LINUX_ONLY, + deps = ["@bazel_tools//tools/bash/runfiles"], +) + sh_test( name = "crash_resistant_coverage_test", srcs = ["src/test/shell/crash_resistant_coverage_test.sh"], diff --git a/tests/src/test/java/com/example/PrintPcsTarget.java b/tests/src/test/java/com/example/PrintPcsTarget.java new file mode 100644 index 000000000..c9ac05845 --- /dev/null +++ b/tests/src/test/java/com/example/PrintPcsTarget.java @@ -0,0 +1,31 @@ +/* + * Copyright 2026 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example; + +import com.code_intelligence.jazzer.api.FuzzedDataProvider; + +/** Minimal fuzz target used to verify -print_pcs symbolization. */ +public class PrintPcsTarget { + public static void fuzzerTestOneInput(FuzzedDataProvider data) { + int x = data.consumeInt(); + if (x > 1000) { + sink(x); + } + } + + private static void sink(int x) {} +} diff --git a/tests/src/test/shell/print_pcs_symbolization_test.sh b/tests/src/test/shell/print_pcs_symbolization_test.sh new file mode 100755 index 000000000..4c7e502e9 --- /dev/null +++ b/tests/src/test/shell/print_pcs_symbolization_test.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# +# Copyright 2026 Code Intelligence GmbH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Verify that -print_pcs=1 produces symbolized Java source locations +# instead of meaningless hex addresses. + +# --- begin runfiles.bash initialization v2 --- +set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v2 --- + +function fail() { + echo "FAILED: $1" + exit 1 +} + +JAZZER="$(rlocation "$1")" +TARGET_JAR="$(rlocation "$2")" +output="$TEST_TMPDIR/output" + +# A short burst of runs is enough to discover the target's coverage edges. +"$JAZZER" --cp="$TARGET_JAR" --target_class=com.example.PrintPcsTarget \ + --instrumentation_includes="com.example.**" \ + -print_pcs=1 -runs=10 2>&1 | tee "$output" || true + +# Verify at least one NEW_PC line has a symbolized Java source location: +# "in . .java:" +if ! grep -qP 'NEW_PC:.*in \S+\.\S+ \S+\.java:\d+' "$output"; then + echo "Output was:" + cat "$output" + fail "Expected symbolized NEW_PC lines with Java source locations" +fi