diff --git a/src/executor/wall_time/perf/jit_dump.rs b/src/executor/wall_time/perf/jit_dump.rs index f306de08..27385006 100644 --- a/src/executor/wall_time/perf/jit_dump.rs +++ b/src/executor/wall_time/perf/jit_dump.rs @@ -5,7 +5,7 @@ use crate::{ use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord}; use runner_shared::unwind_data::{ProcessUnwindData, UnwindData}; use std::{ - collections::{HashMap, HashSet}, + collections::HashMap, path::{Path, PathBuf}, }; @@ -108,7 +108,10 @@ impl JitDump { } } -/// Converts all the `jit-.dump` into a perf-.map with symbols, and collects the unwind data +/// Converts all the `jit-.dump` into a perf-.map with symbols, and collects the unwind data. +/// +/// Jitdump file paths are discovered from MMAP2 records in the perf data, since JIT runtimes +/// mmap the jitdump file and perf records the mapping with the actual path on disk. /// /// # Symbols /// Since a jit dump is by definition specific to a single pid, we append the harvested symbols @@ -118,40 +121,38 @@ impl JitDump { /// Unwind data is generated as a list pub async fn save_symbols_and_harvest_unwind_data_for_pids( profile_folder: &Path, - pids: &HashSet, + jit_dump_paths_by_pid: &HashMap>, ) -> Result>> { - let mut jit_unwind_data_by_path = HashMap::new(); - - for pid in pids { - let name = format!("jit-{pid}.dump"); - let path = PathBuf::from("/tmp").join(&name); + let mut jit_unwind_data_by_pid = HashMap::new(); - if !path.exists() { - continue; - } - debug!("Found JIT dump file: {path:?}"); + for (pid, paths) in jit_dump_paths_by_pid { + for path in paths { + debug!("Found JIT dump file: {path:?}"); - let symbols = match JitDump::new(path.clone()).into_perf_map() { - Ok(symbols) => symbols, - Err(error) => { - warn!("Failed to convert jit dump into perf map: {error:?}"); - continue; - } - }; + let symbols = match JitDump::new(path.clone()).into_perf_map() { + Ok(symbols) => symbols, + Err(error) => { + warn!("Failed to convert jit dump into perf map: {error:?}"); + continue; + } + }; - // Also write to perf-.map for harvested Python perf maps compatibility - symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; + symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; - let jit_unwind_data = match JitDump::new(path).into_unwind_data() { - Ok(data) => data, - Err(error) => { - warn!("Failed to convert jit dump into unwind data: {error:?}"); - continue; - } - }; + let jit_unwind_data = match JitDump::new(path.clone()).into_unwind_data() { + Ok(data) => data, + Err(error) => { + warn!("Failed to convert jit dump into unwind data: {error:?}"); + continue; + } + }; - jit_unwind_data_by_path.insert(*pid, jit_unwind_data); + jit_unwind_data_by_pid + .entry(*pid) + .or_insert_with(Vec::new) + .extend(jit_unwind_data); + } } - Ok(jit_unwind_data_by_path) + Ok(jit_unwind_data_by_pid) } diff --git a/src/executor/wall_time/perf/mod.rs b/src/executor/wall_time/perf/mod.rs index c854d49a..ab8674ae 100644 --- a/src/executor/wall_time/perf/mod.rs +++ b/src/executor/wall_time/perf/mod.rs @@ -300,6 +300,7 @@ impl BenchmarkData { let MemmapRecordsOutput { loaded_modules_by_path, tracked_pids, + jit_dump_paths_by_pid, } = { parse_perf_file::parse_for_memmap2(perf_file_path, pid_filter).map_err(|e| { error!("Failed to parse perf file: {e}"); @@ -317,13 +318,15 @@ impl BenchmarkData { error!("Failed to harvest perf maps: {e}"); BenchmarkDataSaveError::FailedToHarvestPerfMaps })?; - let jit_unwind_data_by_pid = - jit_dump::save_symbols_and_harvest_unwind_data_for_pids(path_ref, &tracked_pids) - .await - .map_err(|e| { - error!("Failed to harvest jit dumps: {e}"); - BenchmarkDataSaveError::FailedToHarvestJitDumps - })?; + let jit_unwind_data_by_pid = jit_dump::save_symbols_and_harvest_unwind_data_for_pids( + path_ref, + &jit_dump_paths_by_pid, + ) + .await + .map_err(|e| { + error!("Failed to harvest jit dumps: {e}"); + BenchmarkDataSaveError::FailedToHarvestJitDumps + })?; let artifacts = save_artifacts::save_artifacts( path_ref, diff --git a/src/executor/wall_time/perf/parse_perf_file.rs b/src/executor/wall_time/perf/parse_perf_file.rs index 9f5b0fd8..2e0195a1 100644 --- a/src/executor/wall_time/perf/parse_perf_file.rs +++ b/src/executor/wall_time/perf/parse_perf_file.rs @@ -42,6 +42,8 @@ pub struct MemmapRecordsOutput { /// Module symbols and the computed load bias for each pid that maps the ELF path. pub loaded_modules_by_path: HashMap, pub tracked_pids: HashSet, + /// Jitdump file paths discovered from MMAP2 records, keyed by PID. + pub jit_dump_paths_by_pid: HashMap>, } /// Parse the perf file at `perf_file_path` and look for MMAP2 records for the given `pids`. @@ -53,6 +55,7 @@ pub fn parse_for_memmap2>( mut pid_filter: PidFilter, ) -> Result { let mut loaded_modules_by_path = HashMap::::new(); + let mut jit_dump_paths_by_pid = HashMap::>::new(); // 1MiB buffer let reader = std::io::BufReader::with_capacity( @@ -105,6 +108,22 @@ pub fn parse_for_memmap2>( continue; } + // Collect jitdump file paths before the PROT_EXEC filter in process_mmap2_record + // skips them. JIT runtimes mmap the jitdump file so perf records it. + // Match perf's jit_detect(): basename must be `jit-.dump`. + if is_jit_dump_path(&mmap2_record.path.as_slice(), mmap2_record.pid) { + let path = PathBuf::from( + String::from_utf8_lossy(&mmap2_record.path.as_slice()).into_owned(), + ); + if path.exists() { + debug!("Found jitdump path from MMAP2 record: {path:?}"); + jit_dump_paths_by_pid + .entry(mmap2_record.pid) + .or_default() + .push(path); + } + } + process_mmap2_record(mmap2_record, &mut loaded_modules_by_path); } _ => continue, @@ -123,6 +142,7 @@ pub fn parse_for_memmap2>( Ok(MemmapRecordsOutput { loaded_modules_by_path, tracked_pids, + jit_dump_paths_by_pid, }) } @@ -160,6 +180,17 @@ impl PidFilter { } } +/// Returns true if the path basename matches perf's `jit_detect()` pattern: `jit-.dump`, +/// where `` must match the MMAP2 record's PID. +fn is_jit_dump_path(path: &[u8], pid: pid_t) -> bool { + let Some(pos) = path.iter().rposition(|&b| b == b'/') else { + return false; + }; + let basename = &path[pos + 1..]; + let expected = format!("jit-{pid}.dump"); + basename == expected.as_bytes() +} + /// Process a single MMAP2 record and add it to the symbols and unwind data maps fn process_mmap2_record( record: linux_perf_data::linux_perf_event_reader::Mmap2Record,