lambdaclass
diff --git a/‎crates/verilm-core/src/attention.rs‎
Lines changed: 39 additions & 23 deletions b/‎crates/verilm-core/src/attention.rs‎
Lines changed: 39 additions & 23 deletions
diff --git a/‎crates/verilm-core/src/bounds.rs‎
Lines changed: 31 additions & 13 deletions b/‎crates/verilm-core/src/bounds.rs‎
Lines changed: 31 additions & 13 deletions
diff --git a/‎crates/verilm-core/src/constants.rs‎
Lines changed: 8 additions & 8 deletions b/‎crates/verilm-core/src/constants.rs‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎crates/verilm-core/src/field.rs‎
Lines changed: 10 additions & 2 deletions b/‎crates/verilm-core/src/field.rs‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎crates/verilm-core/src/freivalds.rs‎
Lines changed: 8 additions & 14 deletions b/‎crates/verilm-core/src/freivalds.rs‎
Lines changed: 8 additions & 14 deletions
@@ -86,9 +86,7 @@ pub fn replay_attention_reference(
         let kv_head = qh / heads_per_kv;
 
         // Extract Q head
-        let q_head: Vec<f64> = (0..d_head)
-            .map(|i| q_i8[qh * d_head + i] as f64)
-            .collect();
+        let q_head: Vec<f64> = (0..d_head).map(|i| q_i8[qh * d_head + i] as f64).collect();
 
         // Compute attention scores: score[t] = q · k_t / sqrt(d)
         let scores: Vec<f64> = (0..seq_len)
@@ -153,16 +151,18 @@ pub fn replay_attention_roped(
     let heads_per_kv = cfg.n_q_heads / cfg.n_kv_heads;
     let inv_sqrt_d = 1.0 / (d_head as f64).sqrt();
     let seq_len = kv_cache_k_roped.len();
-    let inv_scale = if scale_a.abs() > 1e-30 { 1.0 / scale_a } else { 1.0 };
+    let inv_scale = if scale_a.abs() > 1e-30 {
+        1.0 / scale_a
+    } else {
+        1.0
+    };
 
     let mut a = vec![0i8; cfg.hidden_dim];
 
     for qh in 0..cfg.n_q_heads {
         let kv_head = qh / heads_per_kv;
 
-        let q_head: Vec<f64> = (0..d_head)
-            .map(|i| q_roped[qh * d_head + i])
-            .collect();
+        let q_head: Vec<f64> = (0..d_head).map(|i| q_roped[qh * d_head + i]).collect();
 
         // Attention scores: q · k_t / sqrt(d)
         let scores: Vec<f64> = (0..seq_len)
@@ -192,9 +192,7 @@ pub fn replay_attention_roped(
 
         // Requantize: a_i8 = round(a_f64 / scale_a)
         for i in 0..d_head {
-            a[qh * d_head + i] = (head_out[i] * inv_scale)
-                .round()
-                .clamp(-128.0, 127.0) as i8;
+            a[qh * d_head + i] = (head_out[i] * inv_scale).round().clamp(-128.0, 127.0) as i8;
         }
     }
 
@@ -217,17 +215,19 @@ pub fn replay_attention_roped_raw(
     let heads_per_kv = cfg.n_q_heads / cfg.n_kv_heads;
     let inv_sqrt_d = 1.0 / (d_head as f64).sqrt();
     let seq_len = kv_cache_k_roped.len();
-    let inv_scale = if scale_a.abs() > 1e-30 { 1.0 / scale_a } else { 1.0 };
+    let inv_scale = if scale_a.abs() > 1e-30 {
+        1.0 / scale_a
+    } else {
+        1.0
+    };
 
     let mut a_i8 = vec![0i8; cfg.hidden_dim];
     let mut a_f64 = vec![0.0f64; cfg.hidden_dim];
 
     for qh in 0..cfg.n_q_heads {
         let kv_head = qh / heads_per_kv;
 
-        let q_head: Vec<f64> = (0..d_head)
-            .map(|i| q_roped[qh * d_head + i])
-            .collect();
+        let q_head: Vec<f64> = (0..d_head).map(|i| q_roped[qh * d_head + i]).collect();
 
         let scores: Vec<f64> = (0..seq_len)
             .map(|t| {
@@ -255,9 +255,7 @@ pub fn replay_attention_roped_raw(
         for i in 0..d_head {
             let idx = qh * d_head + i;
             a_f64[idx] = head_out[i];
-            a_i8[idx] = (head_out[i] * inv_scale)
-                .round()
-                .clamp(-128.0, 127.0) as i8;
+            a_i8[idx] = (head_out[i] * inv_scale).round().clamp(-128.0, 127.0) as i8;
         }
     }
 
@@ -337,8 +335,16 @@ pub fn measure_attention_diff(
     };
 
     let n_f = n as f64;
-    let frac_eq = if n > 0 { histogram[0] as f64 / n_f } else { 0.0 };
-    let frac_le_1 = if n > 0 { (histogram[0] + histogram[1]) as f64 / n_f } else { 0.0 };
+    let frac_eq = if n > 0 {
+        histogram[0] as f64 / n_f
+    } else {
+        0.0
+    };
+    let frac_le_1 = if n > 0 {
+        (histogram[0] + histogram[1]) as f64 / n_f
+    } else {
+        0.0
+    };
     let frac_le_2 = if n > 0 {
         (histogram[0] + histogram[1] + histogram[2]) as f64 / n_f
     } else {
@@ -365,7 +371,11 @@ pub fn measure_attention_diff(
 /// Returns `None` if the vectors have equal length and the L-infinity difference
 /// is within `tolerance.max_abs_diff`. Returns `Some(i16::MAX)` if lengths differ
 /// (malformed input), or `Some(max_diff)` if the tolerance is exceeded.
-pub fn compare_attention_output(claimed: &[i8], replayed: &[i8], tolerance: &AttentionToleranceConfig) -> Option<i16> {
+pub fn compare_attention_output(
+    claimed: &[i8],
+    replayed: &[i8],
+    tolerance: &AttentionToleranceConfig,
+) -> Option<i16> {
     if claimed.len() != replayed.len() {
         return Some(i16::MAX);
     }
@@ -480,11 +490,17 @@ mod tests {
         let claimed = vec![1i8, 2, 3];
         let replayed = vec![1i8, 2, 3, 4];
         let tol = AttentionToleranceConfig { max_abs_diff: 0 };
-        assert_eq!(compare_attention_output(&claimed, &replayed, &tol), Some(i16::MAX));
+        assert_eq!(
+            compare_attention_output(&claimed, &replayed, &tol),
+            Some(i16::MAX)
+        );
 
         // Extended claimed vector is also rejected
         let claimed2 = vec![1i8, 2, 3, 4, 5];
-        assert_eq!(compare_attention_output(&claimed2, &replayed, &tol), Some(i16::MAX));
+        assert_eq!(
+            compare_attention_output(&claimed2, &replayed, &tol),
+            Some(i16::MAX)
+        );
     }
 
     #[test]
@@ -508,7 +524,7 @@ mod tests {
     #[test]
     fn test_measure_diff_known_values() {
         // diffs: 0, 1, 2, 3, 5, 7
-        let claimed  = vec![10i8, 20, 30, 40, 50, 60];
+        let claimed = vec![10i8, 20, 30, 40, 50, 60];
         let replayed = vec![10i8, 19, 28, 37, 45, 53];
         let stats = measure_attention_diff(&claimed, &replayed, 3, 10).unwrap();
         assert_eq!(stats.linf, 7);
 
@@ -227,7 +227,7 @@ pub fn compute_corridor_bound(
             // K,V committed (exact). Only Q has dequant+RoPE error.
             let eps_qk = params.c_rope * u;
             let eps_v = 0.0; // V is committed
-            // Score error: |Δs_j| ≤ ε_qk·S_max (only Q perturbed)
+                             // Score error: |Δs_j| ≤ ε_qk·S_max (only Q perturbed)
             let softmax = params.bv_over_scale_a * 2.0 * eps_qk * params.s_max;
             let v_deq = 0.0;
             let fp32 = params.bv_over_scale_a * params.seq_len as f64 * u_f32;
@@ -350,12 +350,18 @@ mod tests {
         assert!((b.eps_qk - 5.0 * 2.0_f64.powi(-11)).abs() < 1e-10);
 
         // softmax_term = 127 · 4 · 0.00244 · 20 ≈ 24.8
-        assert!(b.softmax_term > 24.0 && b.softmax_term < 26.0,
-            "softmax_term={}", b.softmax_term);
+        assert!(
+            b.softmax_term > 24.0 && b.softmax_term < 26.0,
+            "softmax_term={}",
+            b.softmax_term
+        );
 
         // Total ≈ 24.8 + 0.06 + negligible ≈ 24.9
-        assert!(b.delta_o_over_scale_a > 24.0,
-            "expected ~25, got {}", b.delta_o_over_scale_a);
+        assert!(
+            b.delta_o_over_scale_a > 24.0,
+            "expected ~25, got {}",
+            b.delta_o_over_scale_a
+        );
         assert!(!b.achieves_leq_1);
         assert!(b.max_abs_diff_i8 >= 25);
     }
@@ -374,8 +380,11 @@ mod tests {
         let b = compute_corridor_bound(&params, CommittedIntermediates::CommittedKV);
 
         // softmax_term = 127 · 2 · 0.00244 · 20 ≈ 12.4
-        assert!(b.softmax_term > 12.0 && b.softmax_term < 13.0,
-            "softmax_term={}", b.softmax_term);
+        assert!(
+            b.softmax_term > 12.0 && b.softmax_term < 13.0,
+            "softmax_term={}",
+            b.softmax_term
+        );
         assert!(!b.achieves_leq_1);
     }
 
@@ -393,8 +402,11 @@ mod tests {
         let b = compute_corridor_bound(&params, CommittedIntermediates::CommittedKV);
 
         // 127 · 2 · 0.00244 · 8 ≈ 4.96
-        assert!(b.softmax_term > 4.5 && b.softmax_term < 5.5,
-            "softmax_term={}", b.softmax_term);
+        assert!(
+            b.softmax_term > 4.5 && b.softmax_term < 5.5,
+            "softmax_term={}",
+            b.softmax_term
+        );
         // Still > 1
         assert!(!b.achieves_leq_1);
     }
@@ -413,8 +425,11 @@ mod tests {
         let b = compute_corridor_bound(&params, CommittedIntermediates::CommittedQKV);
 
         // 127 · 3 · 4096 · 2^-24 ≈ 0.093
-        assert!(b.delta_o_over_scale_a < 0.1,
-            "expected < 0.1, got {}", b.delta_o_over_scale_a);
+        assert!(
+            b.delta_o_over_scale_a < 0.1,
+            "expected < 0.1, got {}",
+            b.delta_o_over_scale_a
+        );
         assert!(b.achieves_leq_1);
     }
 
@@ -432,8 +447,11 @@ mod tests {
         let b = compute_corridor_bound(&params, CommittedIntermediates::CommittedScores);
 
         // v_dequant = 127 · 2^-11 ≈ 0.062
-        assert!(b.v_dequant_term > 0.06 && b.v_dequant_term < 0.07,
-            "v_dequant_term={}", b.v_dequant_term);
+        assert!(
+            b.v_dequant_term > 0.06 && b.v_dequant_term < 0.07,
+            "v_dequant_term={}",
+            b.v_dequant_term
+        );
         assert!(b.delta_o_over_scale_a < 0.1);
         assert!(b.achieves_leq_1);
     }
 
@@ -139,9 +139,9 @@ pub struct RopeScaling {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ModelConfig {
     pub name: String,
-    pub hidden_dim: usize,    // n
-    pub kv_dim: usize,        // n_kv = n_kv_heads * d_head
-    pub ffn_dim: usize,       // n_ffn
+    pub hidden_dim: usize, // n
+    pub kv_dim: usize,     // n_kv = n_kv_heads * d_head
+    pub ffn_dim: usize,    // n_ffn
     pub d_head: usize,
     pub n_layers: usize,
     pub n_q_heads: usize,
@@ -161,7 +161,7 @@ impl ModelConfig {
         ModelConfig {
             name: "Llama-3-70B".into(),
             hidden_dim: 8192,
-            kv_dim: 1024,     // 8 KV heads * 128
+            kv_dim: 1024, // 8 KV heads * 128
             ffn_dim: 28672,
             d_head: 128,
             n_layers: 80,
@@ -177,7 +177,7 @@ impl ModelConfig {
         ModelConfig {
             name: "Llama-3-8B".into(),
             hidden_dim: 4096,
-            kv_dim: 1024,     // 8 KV heads * 128
+            kv_dim: 1024, // 8 KV heads * 128
             ffn_dim: 14336,
             d_head: 128,
             n_layers: 32,
@@ -194,7 +194,7 @@ impl ModelConfig {
         ModelConfig {
             name: "Llama-3.1-8B".into(),
             hidden_dim: 4096,
-            kv_dim: 1024,     // 8 KV heads * 128
+            kv_dim: 1024, // 8 KV heads * 128
             ffn_dim: 14336,
             d_head: 128,
             n_layers: 32,
@@ -216,7 +216,7 @@ impl ModelConfig {
         ModelConfig {
             name: "Llama-3-405B".into(),
             hidden_dim: 16384,
-            kv_dim: 1024,     // 8 KV heads * 128
+            kv_dim: 1024, // 8 KV heads * 128
             ffn_dim: 53248,
             d_head: 128,
             n_layers: 126,
@@ -233,7 +233,7 @@ impl ModelConfig {
         ModelConfig {
             name: "toy".into(),
             hidden_dim: 16,
-            kv_dim: 4,        // 2 KV heads * 2
+            kv_dim: 4, // 2 KV heads * 2
             ffn_dim: 32,
             d_head: 2,
             n_layers: 2,
 
@@ -140,7 +140,11 @@ impl Fp64 {
         let lo2 = sum & P64;
         let hi2 = sum >> 61;
         let r = lo2 + hi2;
-        if r >= P64 { r - P64 } else { r }
+        if r >= P64 {
+            r - P64
+        } else {
+            r
+        }
     }
 
     pub fn new(val: u64) -> Self {
@@ -379,7 +383,11 @@ impl Fp128 {
         let lo = val & P128;
         let hi = val >> 127;
         let r = lo + hi;
-        if r >= P128 { r - P128 } else { r }
+        if r >= P128 {
+            r - P128
+        } else {
+            r
+        }
     }
 
     pub fn new(val: u128) -> Self {
 
@@ -13,7 +13,7 @@
 //! any input x without actually computing Wx. Since v = r^T W and the
 //! prover knows W (open weights), leaking v also leaks r.
 
-use crate::field::{Fp, Fp64, Fp128, P64};
+use crate::field::{Fp, Fp128, Fp64, P64};
 
 /// Precompute v = r^T W in F_p.
 ///
@@ -112,7 +112,7 @@ pub fn derive_block_coefficients(
     matrix_idx: usize,
     n_blocks: usize,
 ) -> Vec<Fp> {
-    use sha2::{Sha256, Digest};
+    use sha2::{Digest, Sha256};
 
     let mut coeffs = Vec::with_capacity(n_blocks);
     for b in 0..n_blocks {
@@ -145,13 +145,7 @@ pub fn derive_block_coefficients(
 /// - `c`: random batching coefficients (length = n_blocks)
 ///
 /// Returns true if the check passes.
-pub fn check_q8_blocks(
-    v: &[Fp],
-    x: &[i8],
-    r: &[Fp],
-    sumi: &[Vec<i32>],
-    c: &[Fp],
-) -> bool {
+pub fn check_q8_blocks(v: &[Fp], x: &[i8], r: &[Fp], sumi: &[Vec<i32>], c: &[Fp]) -> bool {
     let n_blocks = sumi.len();
     assert_eq!(c.len(), n_blocks);
     assert_eq!(v.len(), n_blocks * Q8_0_BLOCK_SIZE);
@@ -433,10 +427,8 @@ mod tests {
         ];
         let d_w = vec![
             // row 0: scale_b0, scale_b1
-            1.0, 2.0,
-            // row 1
-            0.5, 1.5,
-            // row 2
+            1.0, 2.0, // row 1
+            0.5, 1.5, // row 2
             1.0, 1.0,
         ];
         let d_x = vec![1.0, 0.5];
@@ -479,7 +471,9 @@ mod tests {
         // Flat check
         let z: Vec<i32> = (0..rows)
             .map(|row| {
-                (0..cols).map(|col| w[row * cols + col] as i32 * x[col] as i32).sum()
+                (0..cols)
+                    .map(|col| w[row * cols + col] as i32 * x[col] as i32)
+                    .sum()
             })
             .collect();
         assert!(check(&v, &x, &r, &z));