Skip to content

Commit d6fa72f

Browse files
findstrfindstr                                                                                                                                                                              email = findstr@sina.com
authored andcommitted
Support multi-version Lua (5.3/5.4/5.5) with dynamic detection
Major changes: - Add dynamic Lua version detection via DWARF debug info - Split lua.h into version-specific headers (lua_5_3_6.h, lua_5_4_0.h, lua_5_5_0.h) - Build separate BPF skeletons for each Lua version - Use DWARF to precisely locate the L variable position Bug fixes: - Fix tsslen macro in lua_5_4_0.h: use shrlen != 0xFF check instead of tt field - Fix lua_get_file in lua_5_5_0.h: use strisshr() for short string detection - Fix build.rs typo: cargo:cargo -> cargo:rerun-if-changed - Add missing rerun-if-changed for new lua header files Other changes: - Update dependencies to latest versions - Change default sample frequency from 1000Hz to 100Hz - Bump version to 0.2
1 parent 12a38f4 commit d6fa72f

18 files changed

Lines changed: 2822 additions & 465 deletions

File tree

.github/workflows/rust.yml

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,52 @@ env:
1111

1212
jobs:
1313
build:
14-
15-
runs-on: ubuntu-latest
14+
runs-on: ubuntu-24.04
1615

1716
steps:
18-
- uses: actions/checkout@v3
17+
- uses: actions/checkout@v4
18+
19+
- name: Install dependencies
20+
run: |
21+
sudo apt-get update
22+
sudo apt-get install -y \
23+
clang \
24+
llvm \
25+
libelf-dev \
26+
linux-tools-common \
27+
linux-tools-generic \
28+
libbpf-dev \
29+
linux-headers-$(uname -r)
30+
31+
- name: Setup bpftool
32+
run: |
33+
# Link bpftool from linux-tools
34+
sudo ln -sf /usr/lib/linux-tools/*/bpftool /usr/local/bin/bpftool || true
35+
bpftool version
36+
37+
- name: Generate vmlinux.h if BTF not available
38+
run: |
39+
# Check if BTF is available on the runner
40+
if [ ! -f /sys/kernel/btf/vmlinux ]; then
41+
echo "BTF not available, downloading pre-generated vmlinux.h..."
42+
mkdir -p src/bpf
43+
# Use vmlinux.h from libbpf-bootstrap (covers common kernel structures)
44+
curl -sL https://raw.githubusercontent.com/libbpf/libbpf-bootstrap/master/vmlinux/vmlinux_608.h -o src/bpf/vmlinux.h
45+
echo "Downloaded vmlinux.h"
46+
else
47+
echo "BTF available at /sys/kernel/btf/vmlinux"
48+
fi
49+
50+
- name: Setup Rust
51+
uses: dtolnay/rust-toolchain@stable
52+
1953
- name: Build
20-
run: cargo build --verbose
54+
run: |
55+
# If vmlinux.h was pre-generated, skip btf_dump in build.rs
56+
if [ -f src/bpf/vmlinux.h ]; then
57+
export SKIP_BTF_DUMP=1
58+
fi
59+
cargo build --verbose
60+
2161
- name: Run tests
2262
run: cargo test --verbose

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ Cargo.lock
1313
# MSVC Windows builds of rustc generate these, which store debugging information
1414
*.pdb
1515
src/bpf/vmlinux.h
16+
lua/

Cargo.toml

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "lua-perf"
3-
version = "0.1.0"
3+
version = "0.2.0"
44
edition = "2021"
55
keywords = ["lua", "perf", "lua-perf"]
66
build = "build.rs"
@@ -20,30 +20,28 @@ exclude = [
2020

2121
[dependencies]
2222
libc = "0.2.147"
23-
procfs = "0.16.0-RC1"
24-
procmaps = "0.4.1"
25-
memmap2 = "0.9.0"
26-
gimli = "0.28.0"
23+
procfs = "0.18.0"
24+
memmap2 = "0.9.9"
25+
gimli = "0.32"
2726
rustc-demangle = "0.1.21"
28-
regex = "1.6"
27+
regex = "1.11"
2928
iced-x86 = "1.17"
3029
byteorder = "1.4.3"
3130
anyhow = "1.0"
32-
libbpf-rs="0.21.2"
31+
libbpf-rs="0.25.0"
3332
plain = "0.2"
34-
nix = "0.27.1"
33+
nix = "0.30.1"
3534
tracing = "0.1"
3635
tracing-subscriber = {version = "0.3", features = ["ansi", "env-filter", "fmt"]}
37-
blazesym = "=0.2.0-alpha.6"
38-
goblin = { version = "0.7.1", features = ["elf64"] }
36+
blazesym = "=0.2.1"
37+
goblin = { version = "0.10.4", features = ["elf64"] }
3938
clap = { version = "4.4.6", features = ["derive", "env"] }
4039
clap_derive = "4.4.2"
4140
time = { version = "0.3", features = ["formatting", "local-offset", "macros"]}
42-
psutil = "3.2.2"
4341
lazy_static = "1.4.0"
4442
ctrlc = "3.4.1"
45-
capstone = "0.11.0"
43+
capstone = "0.14.0"
4644

4745
[build-dependencies]
48-
libbpf-cargo = "0.21.2"
46+
libbpf-cargo = "0.25.0"
4947

README.en.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
# lua-perf
22
[![cn](https://img.shields.io/badge/lang-cn-red.svg)](./README.md)
33

4-
`lua-perf` is a performance profiling tool implemented based on `eBPF`, currently supporting `Lua 5.4`.
4+
`lua-perf` is a performance profiling tool implemented based on `eBPF`, supporting `Lua 5.3`, `Lua 5.4`, and `Lua 5.5`.
55

66
## Features
77

88
- Provides performance analysis for mixed `C` and `Lua` code, as well as pure `C` code.
99
- Uses stack sampling technique with minimal performance impact on the target process, making it suitable for production environments.
1010
- Performs stack backtracing in the kernel space using `eh-frame`, eliminating the need for the target process to use the `-fno-omit-frame-pointer` option to preserve stack frame pointers.
11+
- Automatically detects the Lua version of the target process, no manual specification required.
12+
- Precisely locates the `L` variable position via DWARF debug information, supporting GCC/Clang O0~O3 optimization levels.
1113

1214
## Requirements
1315

@@ -19,7 +21,7 @@ To use `lua-perf`, make sure you meet the following requirements:
1921

2022
To generate flame graphs, you need to use `lua-perf` in conjunction with the [FlameGraph](https://github.com/brendangregg/FlameGraph.git) tool. Here's how you can do it:
2123

22-
1. First, run the command `sudo lua-perf -p <pid> -f <HZ>` to sample the call stacks of the target process and generate a `perf.fold` file in the current directory. `<pid>` is the process ID of the target process, which can be a process inside a Docker container or a process on the host machine. `<HZ>` is the stack sampling frequency, with a default value of `1000` (1000 samples per second).
24+
1. First, run the command `sudo lua-perf -p <pid> -f <HZ>` to sample the call stacks of the target process and generate a `perf.fold` file in the current directory. `<pid>` is the process ID of the target process, which can be a process inside a Docker container or a process on the host machine. `<HZ>` is the stack sampling frequency, with a default value of `100` (100 samples per second).
2325

2426
2. Next, convert the `perf.fold` file to a flame graph by running `./FlameGraph/flamegraph.pl perf.folded > perf.svg`.
2527

@@ -35,15 +37,13 @@ In the BPF program, bpf_trace_printk is used to print logs. If you suspect any a
3537
```
3638
sudo mount -t tracefs nodev /sys/kernel/tracing
3739
sudo cat /sys/kernel/debug/tracing/trace_pipe
38-
These commands will help you access the logs and view them. If you have any further questions, feel free to ask.
3940
```
4041

4142
## Known Issues
4243

4344
`lua-perf` currently has the following known issues:
4445

4546
- Lack of support for `CFA_expression`, which may result in failed stack backtracing in extreme cases.
46-
- When analyzing Lua stacks, the search for the `L` pointer is currently done by assuming it is stored in register `rbx`, which is correct for most cases with `GCC -O2`. However, depending on the optimization level of GCC, the value of `L` may be stored in a different register, leading to failures in Lua stack analysis.
4747
- The analysis of `CFA` instructions does not handle `vdso` at the moment, causing stack backtracing failures for function calls in `vdso`.
4848
- The process of merging C stacks and Lua stacks uses a heuristic strategy, which may have some flaws in extreme cases (none have been found so far).
4949

@@ -53,7 +53,5 @@ The following tasks are planned for `lua-perf`:
5353

5454
- Support for `CFA_expression`
5555
- Support for `vdso`
56-
- Dynamic analysis of the `L` register
5756
- Optimization of the merging strategy for C stacks and Lua stacks
58-
- Support for more versions of Lua
5957

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
# lua-perf
22
[![en](https://img.shields.io/badge/lang-en-red.svg)](./README.en.md)
33

4-
`lua-perf`是一个基于`eBPF`实现的性能分析工具,目前仅支持`Lua 5.4`
4+
`lua-perf`是一个基于`eBPF`实现的性能分析工具,支持`Lua 5.3``Lua 5.4``Lua 5.5`
55

66
## 功能
77

88
- 提供对`C``Lua`混合代码的性能分析,同时也支持纯`C`代码。
99
- 采用栈采样技术,并且对目标进程的性能影响非常小,可以在生产环境中使用。
1010
- 通过使用`eh-frame`在内核空间进行栈回溯,不要求目标进程使用`-fno-omit-frame-pointer`选项来保留栈帧指针。
11+
- 自动检测目标进程的Lua版本,无需手动指定。
12+
- 通过DWARF调试信息精确定位`L`变量位置,支持GCC/Clang O0~O3优化级别。
1113

1214
## 执行要求
1315

@@ -19,7 +21,7 @@
1921

2022
要生成火焰图,您需要使用`lua-perf`配合[FlameGraph](https://github.com/brendangregg/FlameGraph.git)工具进行操作。以下是步骤:
2123

22-
1. 首先,使用命令 `sudo lua-perf -p <pid> -f <HZ>` 对目标进程进行栈采样,并在当前目录下生成 `perf.fold` 文件。其中 `<pid>` 是目标进程的进程ID,可以是Docker内的进程或者宿主机上的进程。`<HZ>` 是栈的采样频率,默认为 `1000`(即每秒采样1000次)。
24+
1. 首先,使用命令 `sudo lua-perf -p <pid> -f <HZ>` 对目标进程进行栈采样,并在当前目录下生成 `perf.fold` 文件。其中 `<pid>` 是目标进程的进程ID,可以是Docker内的进程或者宿主机上的进程。`<HZ>` 是栈的采样频率,默认为 `100`(即每秒采样100次)。
2325

2426
2. 然后,使用命令 `./FlameGraph/flamegraph.pl perf.folded > perf.svg``perf.fold` 文件转换成火焰图。
2527

@@ -43,7 +45,6 @@ sudo cat /sys/kernel/debug/tracing/trace_pipe
4345
`lua-perf`目前存在以下已知问题:
4446

4547
- 尚不支持`CFA_expression`,在某些极端情况下可能会导致调用栈回溯失败。
46-
- 在分析Lua栈时,动态分析`L`寄存器(目前仅支持gcc/clang O0~03)
4748
- 在分析`CFA`指令时,暂时没有处理 `vdso`,因此在 `vdso` 中的函数调用会导致栈回溯失败。
4849
- 在合并进程的C栈和Lua栈时,采用了启发式的合并策略,极端情况下可能存在一些瑕疵(目前尚未发现)。
4950

@@ -54,4 +55,3 @@ sudo cat /sys/kernel/debug/tracing/trace_pipe
5455
- 支持`CFA_expression`
5556
- 支持`vdso`
5657
- 优化C栈和Lua栈的合并策略
57-
- 支持更多版本的Lua

build.rs

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ use libbpf_cargo::SkeletonBuilder;
1010
const BPF_SRC: &str = concat!("src/bpf/", "profile.bpf.c");
1111

1212
fn btf_dump() {
13+
// Check if we should skip BTF dump (e.g., vmlinux.h already exists in src/bpf/)
14+
if env::var("SKIP_BTF_DUMP").is_ok() {
15+
println!("cargo:warning=Skipping BTF dump, using existing vmlinux.h");
16+
return;
17+
}
18+
1319
let mut out = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script"));
1420
out.push("vmlinux.h");
1521
let path = out.to_str().unwrap();
@@ -20,28 +26,53 @@ fn btf_dump() {
2026
.spawn()
2127
.expect("failed to generate vmlinux.h");
2228
cmd.wait().expect("fail to generate vmlinux.h");
23-
println!("cargo:cargo rerun-if-not-exists={}", path);
29+
println!("cargo:rerun-if-changed={}", path);
2430
}
2531

2632
fn output() ->String {
2733
let out_dir = env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script");
28-
let out_dir_str = out_dir.to_str().unwrap().clone();
34+
let out_dir_str = out_dir.to_str().unwrap();
2935
String::from_str(out_dir_str).unwrap()
3036
}
3137
fn build_skel() {
32-
let mut out = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script"));
33-
out.push("profile.skel.rs");
34-
let include = format!("-I{}", output());
38+
let out_dir = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script"));
39+
40+
// When SKIP_BTF_DUMP is set, vmlinux.h is in src/bpf/ instead of OUT_DIR
41+
let include_out = format!("-I{}", output());
42+
let include_src_bpf = "-Isrc/bpf".to_string();
43+
44+
// Define versions: (suffix, clang_flags)
45+
// Suffix "" means default filename "profile.skel.rs" (Lua 5.4.0 default)
46+
let versions = vec![
47+
("_5_5_0", vec!["-DLUA_VERSION_5_5_0"]),
48+
("_5_4_0", vec!["-DLUA_VERSION_5_4_0"]),
49+
("_5_3_6", vec!["-DLUA_VERSION_5_3_6"]),
50+
];
51+
52+
for (suffix, flags) in versions {
53+
let mut out = out_dir.clone();
54+
out.push(format!("profile{}.skel.rs", suffix));
55+
56+
// Include both OUT_DIR (for normal builds) and src/bpf (for pre-generated vmlinux.h)
57+
let mut args = vec![include_out.clone(), include_src_bpf.clone()];
58+
for f in flags {
59+
args.push(f.to_string());
60+
}
61+
3562
SkeletonBuilder::new()
36-
.debug(true)
37-
.clang_args(include)
63+
.clang_args(args)
3864
.source(BPF_SRC)
3965
.build_and_generate(&out)
4066
.unwrap();
41-
println!("cargo:rerun-if-changed={BPF_SRC}");
42-
println!("cargo:rerun-if-changed=src/bpf/lstate.h");
43-
println!("cargo:rerun-if-changed=src/bpf/profile.h");
44-
println!("cargo:rerun-if-changed=src/bpf/hash.h");
67+
}
68+
69+
println!("cargo:rerun-if-changed={BPF_SRC}");
70+
println!("cargo:rerun-if-changed=src/bpf/profile.h");
71+
println!("cargo:rerun-if-changed=src/bpf/hash.h");
72+
println!("cargo:rerun-if-changed=src/bpf/lua.h");
73+
println!("cargo:rerun-if-changed=src/bpf/lua_5_3_6.h");
74+
println!("cargo:rerun-if-changed=src/bpf/lua_5_4_0.h");
75+
println!("cargo:rerun-if-changed=src/bpf/lua_5_5_0.h");
4576
}
4677

4778
fn main() {

src/args.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ use clap::Parser;
33
#[derive(Parser)]
44
#[command(name = "lua-perf")]
55
#[command(author = "findstr <findstrx@gmail.com>")]
6-
#[command(version = "0.1")]
6+
#[command(version = "0.2")]
77
#[command(about = "A perf tool for C and Lua hybrid code")]
88
pub struct Args {
99
#[arg(short, long, help = "PID of the process to profile")]
1010
pub pid: libc::pid_t,
11-
#[clap(short, long, default_value_t = 1000, help="Profile sample frequency(HZ)")]
11+
#[clap(short, long, default_value_t = 100, help="Profile sample frequency(HZ)")]
1212
pub freq: u64,
1313
}

src/bpf/lua.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
#ifndef _LUA_H
22
#define _LUA_H
33

4-
#include "lua54.h"
4+
#if defined(LUA_VERSION_5_5_0)
5+
#include "lua_5_5_0.h"
6+
#elif defined(LUA_VERSION_5_4_0)
7+
#include "lua_5_4_0.h"
8+
#elif defined(LUA_VERSION_5_3_6)
9+
#include "lua_5_3_6.h"
10+
#else
11+
#error "Lua version not defined"
12+
#endif
513

614
#endif

0 commit comments

Comments
 (0)