feat: add CMake config package and Google Benchmark suite

peng.li24 · claude · peng.li24 · commit 443d345c100d · 2026-05-17T10:23:29.000Z
- find_package(numpycpp) support via installed cmake config
- INTERFACE target for header-only consumers
- bench/ with google-benchmark microbenchmarks (sqrt, exp, log, sin, cos,
  sum, mean, max, dot, norm)
- Python numpy reference benchmark for comparison
- Fix header install directory structure

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -6,12 +6,41 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
+# ---- INTERFACE target for find_package consumers ----------------------------
+add_library(numpycpp INTERFACE)
+target_include_directories(numpycpp INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+    $<INSTALL_INTERFACE:include/numpycpp>
+)
+target_compile_features(numpycpp INTERFACE cxx_std_17)
+
 # ---- Install — header-only C++ library --------------------------------------
-install(DIRECTORY numpy/
+install(DIRECTORY numpy
     DESTINATION include/numpycpp
     FILES_MATCHING PATTERN "*.h"
 )
 
+install(TARGETS numpycpp
+    EXPORT numpycpp-targets
+    DESTINATION lib/cmake/numpycpp
+)
+
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+    numpycpp-config.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/numpycpp-config.cmake
+    INSTALL_DESTINATION lib/cmake/numpycpp
+)
+
+install(EXPORT numpycpp-targets
+    NAMESPACE numpycpp::
+    DESTINATION lib/cmake/numpycpp
+)
+
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/numpycpp-config.cmake
+    DESTINATION lib/cmake/numpycpp
+)
+
 # ---- CPack DEB packaging ----------------------------------------------------
 set(CPACK_PACKAGE_NAME                     "numpycpp-dev")
 set(CPACK_PACKAGE_VERSION                  ${PROJECT_VERSION})
diff --git a/README.md b/README.md
@@ -38,14 +38,28 @@ double s = numpy::sum(data.data(), data.size());
 
 ### Install
 
+**Ubuntu (DEB)**
+
+Download the [latest `.deb` release](https://github.com/array2d/numpycpp/releases) or build from source:
+
 ```bash
 mkdir build && cd build
 cmake .. -DCMAKE_BUILD_TYPE=Release
 make deb
 sudo dpkg -i numpcpp-dev-*.deb
-# headers installed to /usr/include/numpycpp/
 ```
 
+Headers are installed to `/usr/include/numpycpp/` along with CMake config. Consuming projects use:
+
+```cmake
+find_package(numpycpp REQUIRED)
+target_link_libraries(myapp PRIVATE numpycpp::numpycpp)
+```
+
+**Manual (header-only)**
+
+Add `-Ipath/to/numpycpp` to your compiler flags and include the headers directly. No build step, no copy required.
+
 ### Testing
 
 The test suite verifies pixel-level precision alignment between every C++ function and Python numpy.
diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.16)
+project(numpycpp-bench LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# FetchContent google-benchmark
+include(FetchContent)
+FetchContent_Declare(googlebench
+    GIT_REPOSITORY https://github.com/google/benchmark.git
+    GIT_TAG v1.8.3
+)
+set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
+FetchContent_MakeAvailable(googlebench)
+
+add_executable(bench_core bench_core.cpp)
+target_include_directories(bench_core PRIVATE ..)
+target_link_libraries(bench_core PRIVATE benchmark::benchmark)
diff --git a/bench/bench_core.cpp b/bench/bench_core.cpp
@@ -0,0 +1,104 @@
+#include "numpy/core.h"
+#include "numpy/linalg.h"
+#include <benchmark/benchmark.h>
+#include <vector>
+#include <random>
+#include <cmath>
+
+// ---- helpers -----------------------------------------------------------------
+
+std::vector<double> make_data(size_t n) {
+    std::vector<double> v(n);
+    std::mt19937 rng(42);
+    std::uniform_real_distribution<double> dist(1.0, 100.0);
+    for (size_t i = 0; i < n; ++i) v[i] = dist(rng);
+    return v;
+}
+
+// ---- element-wise -------------------------------------------------------------
+
+#define BENCH_ELEMWISE(NAME)                                       \
+static void BM_##NAME(benchmark::State& state) {                   \
+    size_t n = state.range(0);                                     \
+    auto src = make_data(n);                                       \
+    std::vector<double> dst(n);                                    \
+    for (auto _ : state) {                                         \
+        numpy::NAME(src.data(), dst.data(), n);                    \
+        benchmark::DoNotOptimize(dst.data());                      \
+    }                                                              \
+    state.SetItemsProcessed(state.iterations() * n);               \
+}                                                                  \
+BENCHMARK(BM_##NAME)->Range(1 << 10, 1 << 22);
+
+BENCH_ELEMWISE(sqrt)
+BENCH_ELEMWISE(abs)
+BENCH_ELEMWISE(exp)
+BENCH_ELEMWISE(log)
+BENCH_ELEMWISE(sin)
+BENCH_ELEMWISE(cos)
+
+// ---- reduction ---------------------------------------------------------------
+
+static void BM_sum(benchmark::State& state) {
+    size_t n = state.range(0);
+    auto src = make_data(n);
+    for (auto _ : state) {
+        double s = numpy::sum(src.data(), n);
+        benchmark::DoNotOptimize(s);
+    }
+    state.SetItemsProcessed(state.iterations() * n);
+}
+BENCHMARK(BM_sum)->Range(1 << 10, 1 << 22);
+
+static void BM_mean(benchmark::State& state) {
+    size_t n = state.range(0);
+    auto src = make_data(n);
+    for (auto _ : state) {
+        double m = numpy::mean(src.data(), n);
+        benchmark::DoNotOptimize(m);
+    }
+    state.SetItemsProcessed(state.iterations() * n);
+}
+BENCHMARK(BM_mean)->Range(1 << 10, 1 << 22);
+
+static void BM_max(benchmark::State& state) {
+    size_t n = state.range(0);
+    auto src = make_data(n);
+    for (auto _ : state) {
+        double m = numpy::max(src.data(), n);
+        benchmark::DoNotOptimize(m);
+    }
+    state.SetItemsProcessed(state.iterations() * n);
+}
+BENCHMARK(BM_max)->Range(1 << 10, 1 << 22);
+
+// ---- dot product (1D) ---------------------------------------------------------
+
+static void BM_dot(benchmark::State& state) {
+    size_t n = state.range(0);
+    auto a = make_data(n);
+    auto b = make_data(n);
+    for (auto _ : state) {
+        double d = numpy::dot(a.data(), b.data(), n);
+        benchmark::DoNotOptimize(d);
+    }
+    state.SetItemsProcessed(state.iterations() * n);
+}
+BENCHMARK(BM_dot)->Range(1 << 10, 1 << 22);
+
+// ---- linalg norm --------------------------------------------------------------
+
+static void BM_norm(benchmark::State& state) {
+    size_t n = state.range(0);
+    auto src = make_data(n);
+    for (auto _ : state) {
+        double r = numpy::linalg::norm(src.data(), n);
+        benchmark::DoNotOptimize(r);
+    }
+    state.SetItemsProcessed(state.iterations() * n);
+}
+BENCHMARK(BM_norm)->Range(1 << 10, 1 << 22);
+
+// ---- main --------------------------------------------------------------------
+
+BENCHMARK_MAIN();
diff --git a/bench/bench_numpy.py b/bench/bench_numpy.py
@@ -0,0 +1,63 @@
+"""Reference numpy benchmarks for comparison with numpcpp bench_core.
+
+Usage: python bench_numpy.py
+"""
+import time
+import numpy as np
+
+SIZES = [1 << 10, 1 << 12, 1 << 14, 1 << 16, 1 << 18, 1 << 20, 1 << 22]
+REPEAT = 10
+
+rng = np.random.default_rng(42)
+
+
+def make_data(n):
+    return rng.uniform(1.0, 100.0, size=n)
+
+
+def bench(name, fn, n, src, *args):
+    t0 = time.perf_counter()
+    for _ in range(REPEAT):
+        result = fn(src, *args)
+    t1 = time.perf_counter()
+    elapsed_ms = (t1 - t0) / REPEAT * 1000
+    print(f"  {name:12s}  n={n:8d}  {elapsed_ms:10.4f} ms  ({n / elapsed_ms * 1000 / 1e6:.2f} Melem/s)")
+
+
+elementwise = [
+    ("sqrt", np.sqrt),
+    ("abs",  np.abs),
+    ("exp",  np.exp),
+    ("log",  np.log),
+    ("sin",  np.sin),
+    ("cos",  np.cos),
+]
+
+reductions = [
+    ("sum",  np.sum),
+    ("mean", np.mean),
+    ("max",  np.max),
+]
+
+print("=== element-wise ===")
+for n in SIZES:
+    src = make_data(n)
+    for name, fn in elementwise:
+        bench(name, fn, n, src)
+
+print("\n=== reduction ===")
+for n in SIZES:
+    src = make_data(n)
+    for name, fn in reductions:
+        bench(name, fn, n, src)
+
+print("\n=== dot (1D) ===")
+for n in SIZES:
+    a = make_data(n)
+    b = make_data(n)
+    bench("dot", np.dot, n, a, b)
+
+print("\n=== norm (L2) ===")
+for n in SIZES:
+    src = make_data(n)
+    bench("norm", np.linalg.norm, n, src)
diff --git a/numpycpp-config.cmake.in b/numpycpp-config.cmake.in
@@ -0,0 +1,3 @@
+@PACKAGE_INIT@
+include("${CMAKE_CURRENT_LIST_DIR}/numpycpp-targets.cmake")
+check_required_components(numpycpp)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+@PACKAGE_INIT@`
	`2`	`+include("${CMAKE_CURRENT_LIST_DIR}/numpycpp-targets.cmake")`
	`3`	`+check_required_components(numpycpp)`