fix: submissons

RivinHD · RivinHD · commit 07c5ae09f825 · 2025-07-28T11:57:15.000Z
diff --git a/submissions/neon/neon_6.bench.cpp b/submissions/neon/neon_6.bench.cpp
@@ -29,18 +29,18 @@ template <uint32_t TMdim, uint32_t TNdim, uint32_t TKdim, uint32_t TBatchDim> cl
   }
 };
 
-BENCHMARK_TEMPLATE_DEFINE_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64, 64, 48, 64, 1)(benchmark::State &state)
+BENCHMARK_TEMPLATE_DEFINE_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64_1, 64, 48, 64, 1)(benchmark::State &state)
 {
   for (auto _ : state)
   {
-    matmul_64_48_64(matrix_a, matrix_b, matrix_c, lda, ldb, ldc);
+    matmul_64_48_64_1(matrix_a, matrix_b, matrix_c, lda, ldb, ldc);
   }
 
   flops = (64 * 48 * 64) * 2;  // M * N * K * 2 instructions (add & mul)
   flops *= state.iterations();
 };
 
-BENCHMARK_REGISTER_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64)->MinWarmUpTime(1.0);  // WarmUp in seconds
+BENCHMARK_REGISTER_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64_1)->MinWarmUpTime(1.0);  // WarmUp in seconds
 
 BENCHMARK_TEMPLATE_DEFINE_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64_16, 64, 48, 64, 16)(benchmark::State &state)
 {
diff --git a/submissions/neon/neon_6.h b/submissions/neon/neon_6.h
@@ -17,7 +17,7 @@ extern "C"
    * @param ldb leading dimension of B.
    * @param ldc leading dimension of C.
    **/
-  void matmul_64_48_64(float const *a, float const *b, float *c, int64_t lda, int64_t ldb, int64_t ldc);
+  void matmul_64_48_64_1(float const *a, float const *b, float *c, int64_t lda, int64_t ldb, int64_t ldc);
 
   /**
    * @brief Batch-reduce GEMM that computes: C+=sum(Ai*Bi) over a batch.
diff --git a/submissions/neon/neon_6.test.cpp b/submissions/neon/neon_6.test.cpp
@@ -29,7 +29,7 @@ TEST_CASE("Test 64x48x64 batch=1 gemm correctness random data", "[neon_6][correc
   copy_matrix(matrix_c, matrix_c_verify);
 
   // Run matmuls
-  matmul_64_48_64(matrix_a, matrix_b, matrix_c, 64, 64, 64);
+  matmul_64_48_64_1(matrix_a, matrix_b, matrix_c, 64, 64, 64);
   naive_matmul_M_N_K_Batch<M, N, K, 1>(matrix_a, matrix_b, matrix_c_verify, 64, 64, 64, 0, 0);
 
   verify_matmul(matrix_c_verify, matrix_c);
@@ -52,7 +52,7 @@ TEST_CASE("Test 64x48x64 batch=1 gemm correctness counting data", "[neon_6][corr
   copy_matrix(matrix_c, matrix_c_verify);
 
   // Run matmuls
-  matmul_64_48_64(matrix_a, matrix_b, matrix_c, 64, 64, 64);
+  matmul_64_48_64_1(matrix_a, matrix_b, matrix_c, 64, 64, 64);
   naive_matmul_M_N_K_Batch<M, N, K, 1>(matrix_a, matrix_b, matrix_c_verify, 64, 64, 64, 0, 0);
 
   verify_matmul(matrix_c_verify, matrix_c);
diff --git a/submissions/neon/neon_6_1_no_batch.s b/submissions/neon/neon_6_1_no_batch.s
@@ -6,9 +6,9 @@
     * @param x4 = ldb leading dimension of B.
     * @param x5 = ldc leading dimension of C.
 **/
-.type matmul_64_48_64, %function
-.global matmul_64_48_64
-matmul_64_48_64:
+.type matmul_64_48_64_1, %function
+.global matmul_64_48_64_1
+matmul_64_48_64_1:
     
     // Procedural Call Standard
     // save frame pointer and link register
@@ -191,4 +191,4 @@ matmul_loop_over_K:
     // ldp fp, lr, [sp], #16
 
     ret
-    .size matmul_64_48_64, (. - matmul_64_48_64)
+    .size matmul_64_48_64_1, (. - matmul_64_48_64_1)

Original file line number	Diff line number	Diff line change
`@@ -29,18 +29,18 @@ template <uint32_t TMdim, uint32_t TNdim, uint32_t TKdim, uint32_t TBatchDim> cl`
`29`	`29`	`}`
`30`	`30`	`};`
`31`	`31`
`32`		`-BENCHMARK_TEMPLATE_DEFINE_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64, 64, 48, 64, 1)(benchmark::State &state)`
	`32`	`+BENCHMARK_TEMPLATE_DEFINE_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64_1, 64, 48, 64, 1)(benchmark::State &state)`
`33`	`33`	`{`
`34`	`34`	`for (auto _ : state)`
`35`	`35`	`{`
`36`		`- matmul_64_48_64(matrix_a, matrix_b, matrix_c, lda, ldb, ldc);`
	`36`	`+ matmul_64_48_64_1(matrix_a, matrix_b, matrix_c, lda, ldb, ldc);`
`37`	`37`	`}`
`38`	`38`
`39`	`39`	`flops = (64 * 48 * 64) * 2; // M * N * K * 2 instructions (add & mul)`
`40`	`40`	`flops *= state.iterations();`
`41`	`41`	`};`
`42`	`42`
`43`		`-BENCHMARK_REGISTER_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64)->MinWarmUpTime(1.0); // WarmUp in seconds`
	`43`	`+BENCHMARK_REGISTER_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64_1)->MinWarmUpTime(1.0); // WarmUp in seconds`
`44`	`44`
`45`	`45`	`BENCHMARK_TEMPLATE_DEFINE_F(GemmMxNxKxBatchFixture, BM_matmul_64_48_64_16, 64, 48, 64, 16)(benchmark::State &state)`
`46`	`46`	`{`