Knowledge3D/benchmark_audio_minimal.py at main · Stream44/Knowledge3D · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python3
"""
Minimal audio codec benchmark bypassing package import issues.
This script directly loads the GPU-only codec components.
"""

import os
import sys
import time
import numpy as np

# Set environment before any CUDA imports
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["LC_ALL"] = "C.UTF-8"
os.environ["LANG"] = "C.UTF-8"

# Add paths for direct imports
sys.path.insert(0, "/mnt/arquivos/EchoSystems AI Studios/Knowledge 3D Standard/GitHub/Knowledge3D")
sys.path.insert(0, "/mnt/arquivos/EchoSystems AI Studios/Knowledge 3D Standard/GitHub/Knowledge3D/knowledge3d/cranium/codecs/ptx_bindings")

# Import PTX bindings directly
from audio_harmonic_binding import AudioHarmonicGPU
from ternary_mdct_binding import TernaryMDCTKernel
from ternary_quant_binding import TernaryQuantizer

print("=" * 80)
print("GPU-Sovereign Audio Codec Benchmark (Post-Harmonic-GPU Implementation)")
print("=" * 80)
print()

# Initialize GPU components
print("Initializing GPU components...")
mdct = TernaryMDCTKernel(n=1024)
harm_gpu = AudioHarmonicGPU()
quant_gpu = TernaryQuantizer()
print("  ✓ TernaryMDCTKernel initialized")
print("  ✓ AudioHarmonicGPU initialized")
print("  ✓ TernaryQuantizer initialized")
print()

# Test signals
sample_rate = 44100
duration = 1.0
num_samples = int(sample_rate * duration)

def generate_sine(freq=440.0):
    t = np.linspace(0, duration, num_samples, endpoint=False)
    return np.sin(2 * np.pi * freq * t).astype(np.float32)

def generate_speech():
    t = np.linspace(0, duration, num_samples, endpoint=False)
    freqs = [120, 240, 360]
    amps = [1.0, 0.6, 0.4]
    signal = sum(a * np.sin(2 * np.pi * f * t) for a, f in zip(amps, freqs))
    signal += 0.02 * np.random.standard_normal(signal.shape)
    return signal.astype(np.float32)

def generate_music():
    t = np.linspace(0, duration, num_samples, endpoint=False)
    chords = [261.63, 329.63, 392.0]  # C major triad
    signal = sum(np.sin(2 * np.pi * f * t) for f in chords) / len(chords)
    signal += 0.05 * np.sin(2 * np.pi * 2 * chords[0] * t)
    return signal.astype(np.float32)

def compute_psnr(original, reconstructed):
    mse = float(np.mean((original - reconstructed) ** 2))
    if mse < 1e-12:
        return float("inf")
    return 10 * np.log10(1.0 / mse)

# GPU harmonic analysis + synthesis
def gpu_analyze_synthesize(audio, n_harmonics=20):
    """Analyze audio and reconstruct using GPU harmonics."""
    frame_size = 1024

    # Use first frame for analysis
    if audio.size < frame_size:
        pad = np.zeros(frame_size, dtype=np.float32)
        pad[:audio.size] = audio
        signal = pad
    else:
        signal = audio[:frame_size]

    # MDCT analysis
    coeffs = mdct.forward(signal)

    # GPU top-K harmonic extraction
    idx, mag = harm_gpu.harmonic_topk(coeffs, k=min(n_harmonics, coeffs.size))

    # Convert to frequencies
    harmonics = []
    for i, a in zip(idx.tolist(), mag.tolist()):
        freq_hz = float(i) * (sample_rate / float(frame_size))
        harmonics.append((freq_hz, float(a), 0.0))

    # GPU additive synthesis
    freq = np.array([h[0] for h in harmonics], dtype=np.float32)
    amp = np.array([h[1] for h in harmonics], dtype=np.float32)
    phase = np.array([h[2] for h in harmonics], dtype=np.float32)

    synth = harm_gpu.synthesize(freq, amp, phase, sample_rate=float(sample_rate), num_samples=audio.size)

    # GPU residual
    residual = harm_gpu.subtract_residual(audio, synth)

    return harmonics, synth, residual

# Benchmark function
def benchmark_audio(name, audio):
    print(f"Testing: {name}")
    print("-" * 60)

    # Encode (harmonic analysis + MDCT residual)
    start = time.perf_counter()
    harmonics, synth, residual = gpu_analyze_synthesize(audio, n_harmonics=20)

    # MDCT on residual (simplified - just first frame for speed)
    frame_size = 1024
    window = np.hanning(frame_size).astype(np.float32)
    frame = residual[:frame_size] * window
    mdct_coeffs = mdct.forward(frame)

    # Ternary quantization
    quantized = quant_gpu.quantize(mdct_coeffs, threshold=0.1)

    encode_time_ms = (time.perf_counter() - start) * 1000

    # Decode (synthesis + IMDCT residual)
    start = time.perf_counter()

    # Dequantize and IMDCT
    dequantized = quant_gpu.dequantize(quantized, scale=0.1)
    imdct_frame = mdct.inverse(dequantized) * window

    # Reconstruct with GPU synthesis
    freq = np.array([h[0] for h in harmonics], dtype=np.float32)
    amp = np.array([h[1] for h in harmonics], dtype=np.float32)
    phase = np.array([h[2] for h in harmonics], dtype=np.float32)
    reconstructed = harm_gpu.synthesize(freq, amp, phase, sample_rate=float(sample_rate), num_samples=audio.size)

    # Add residual (simplified - just first frame)
    reconstructed[:frame_size] += imdct_frame

    decode_time_ms = (time.perf_counter() - start) * 1000

    # Quality metrics
    psnr = compute_psnr(audio, reconstructed[:audio.size])

    # Compression ratio (estimate)
    harmonics_size = len(harmonics) * 3 * 4  # 3 floats per harmonic
    ternary_size = quantized.size * 1.585 / 8  # 1.585 bits per trit
    compressed_size = harmonics_size + ternary_size
    original_size = audio.size * 4  # float32
    ratio = original_size / compressed_size if compressed_size > 0 else float("inf")

    print(f"  Encode time: {encode_time_ms:.2f} ms")
    print(f"  Decode time: {decode_time_ms:.2f} ms")
    print(f"  Compression ratio: {ratio:.1f}×")
    print(f"  PSNR: {psnr:.1f} dB")
    print(f"  Harmonics extracted: {len(harmonics)}")
    print()

    return {
        "name": name,
        "encode_ms": encode_time_ms,
        "decode_ms": decode_time_ms,
        "ratio": ratio,
        "psnr_db": psnr,
        "n_harmonics": len(harmonics),
    }

# Run benchmarks
results = []
print("Running benchmarks...")
print()

results.append(benchmark_audio("sine_440hz", generate_sine(440)))
results.append(benchmark_audio("speech_synth", generate_speech()))
results.append(benchmark_audio("music_piano", generate_music()))

# Summary table
print("=" * 80)
print("SUMMARY")
print("=" * 80)
print()
print(f"{'Audio Type':<15} | {'Encode (ms)':<12} | {'Decode (ms)':<12} | {'Ratio':<8} | {'PSNR (dB)':<10}")
print("-" * 80)
for r in results:
    print(f"{r['name']:<15} | {r['encode_ms']:>11.2f} | {r['decode_ms']:>11.2f} | {r['ratio']:>7.1f}× | {r['psnr_db']:>9.1f}")
print()
print("Note: This is a simplified benchmark focusing on GPU harmonic path performance.")
print("Full codec with multi-frame MDCT would have slightly higher latency.")
print()