From e40c006e52652eba9c7478164f7a53d4df4c908d Mon Sep 17 00:00:00 2001 From: wangpengcheng Date: Fri, 8 May 2026 08:38:49 +0000 Subject: [PATCH] issue/1155 - add topksoftmax interface of infinicore --- include/infinicore/ops.hpp | 1 + include/infinicore/ops/topksoftmax.hpp | 14 ++ src/infinicore/ops/topksoftmax/topksoftmax.cc | 30 ++++ .../ops/topksoftmax/topksoftmax_infiniop.cc | 56 +++++++ src/infinicore/pybind11/ops.hpp | 2 + src/infinicore/pybind11/ops/topksoftmax.hpp | 32 ++++ test/infinicore/ops/topksoftmax.py | 147 ++++++++++++++++++ 7 files changed, 282 insertions(+) create mode 100644 include/infinicore/ops/topksoftmax.hpp create mode 100644 src/infinicore/ops/topksoftmax/topksoftmax.cc create mode 100644 src/infinicore/ops/topksoftmax/topksoftmax_infiniop.cc create mode 100644 src/infinicore/pybind11/ops/topksoftmax.hpp create mode 100644 test/infinicore/ops/topksoftmax.py diff --git a/include/infinicore/ops.hpp b/include/infinicore/ops.hpp index 6019c3a9b..832f48683 100644 --- a/include/infinicore/ops.hpp +++ b/include/infinicore/ops.hpp @@ -45,3 +45,4 @@ #include "ops/silu_and_mul.hpp" #include "ops/softmax.hpp" #include "ops/swiglu.hpp" +#include "ops/topksoftmax.hpp" diff --git a/include/infinicore/ops/topksoftmax.hpp b/include/infinicore/ops/topksoftmax.hpp new file mode 100644 index 000000000..2cd3ac7c5 --- /dev/null +++ b/include/infinicore/ops/topksoftmax.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include "../device.hpp" +#include "../graph/graph.hpp" +#include "../tensor.hpp" +#include "common/op.hpp" + +namespace infinicore::op { + +INFINICORE_GRAPH_OP_CLASS(Topksoftmax, Tensor, Tensor, const Tensor &, const size_t, const int); + +void topksoftmax(Tensor values, Tensor indices, const Tensor &x, const size_t topk, const int norm = 0); + +} // namespace infinicore::op diff --git a/src/infinicore/ops/topksoftmax/topksoftmax.cc b/src/infinicore/ops/topksoftmax/topksoftmax.cc new file mode 100644 index 000000000..c0572ca83 --- /dev/null +++ b/src/infinicore/ops/topksoftmax/topksoftmax.cc @@ -0,0 +1,30 @@ +#include "infinicore/ops/topksoftmax.hpp" + +#include "../../utils.hpp" + +namespace infinicore::op { + +INFINICORE_GRAPH_OP_DISPATCHERS_IMPL(Topksoftmax); + +Topksoftmax::Topksoftmax(Tensor values, + Tensor indices, + const Tensor &x, + const size_t topk, + const int norm) { + INFINICORE_ASSERT_TENSORS_SAME_DEVICE(values, indices, x); + INFINICORE_GRAPH_OP_DISPATCH(values->device().getType(), values, indices, x, topk, norm); +} + +void Topksoftmax::execute(Tensor values, + Tensor indices, + const Tensor &x, + const size_t topk, + const int norm) { + INFINICORE_GRAPH_OP_RECORD_OR_RUN(Topksoftmax, values, indices, x, topk, norm); +} + +void topksoftmax(Tensor values, Tensor indices, const Tensor &x, const size_t topk, const int norm) { + Topksoftmax::execute(values, indices, x, topk, norm); +} + +} // namespace infinicore::op diff --git a/src/infinicore/ops/topksoftmax/topksoftmax_infiniop.cc b/src/infinicore/ops/topksoftmax/topksoftmax_infiniop.cc new file mode 100644 index 000000000..1c016538c --- /dev/null +++ b/src/infinicore/ops/topksoftmax/topksoftmax_infiniop.cc @@ -0,0 +1,56 @@ +#include "infinicore/ops/topksoftmax.hpp" + +#include "../infiniop_impl.hpp" + +namespace infinicore::op::topksoftmax_impl::infiniop { + +INFINIOP_CACHABLE_DESCRIPTOR(Descriptor, Topksoftmax, 100); + +struct PlannedMeta { + std::shared_ptr descriptor; + graph::GraphTensor workspace, values, indices, x; + size_t topk; + int norm; +}; + +void *plan(Tensor values, Tensor indices, const Tensor &x, const size_t topk, const int norm) { + size_t seed = hash_combine(values, indices, x); + + INFINIOP_CACHABLE_DESCRIPTOR_GET_OR_CREATE( + Descriptor, descriptor, Topksoftmax, seed, x->desc()); + + INFINIOP_WORKSPACE_TENSOR(workspace, Topksoftmax, descriptor); + + return new PlannedMeta{ + descriptor, + graph::GraphTensor(workspace), + graph::GraphTensor(values), + graph::GraphTensor(indices), + graph::GraphTensor(x), + topk, + norm}; +} + +void run(void *planned_meta) { + auto planned = reinterpret_cast(planned_meta); + + INFINICORE_CHECK_ERROR(infiniopTopksoftmax( + planned->descriptor->desc, + planned->workspace->data(), + planned->workspace->numel(), + planned->values->data(), + planned->indices->data(), + planned->x->data(), + planned->topk, + planned->norm, + context::getStream())); +} + +void cleanup(void **planned_meta_ptr) { + delete *reinterpret_cast(planned_meta_ptr); + *planned_meta_ptr = nullptr; +} + +INFINICORE_GRAPH_OP_REGISTER_ALLDEVICE(Topksoftmax, &plan, &run, cleanup); + +} // namespace infinicore::op::topksoftmax_impl::infiniop diff --git a/src/infinicore/pybind11/ops.hpp b/src/infinicore/pybind11/ops.hpp index c9c780aad..383429f8f 100644 --- a/src/infinicore/pybind11/ops.hpp +++ b/src/infinicore/pybind11/ops.hpp @@ -97,6 +97,7 @@ #include "ops/tan.hpp" #include "ops/tanhshrink.hpp" #include "ops/topk.hpp" +#include "ops/topksoftmax.hpp" #include "ops/triplet_margin_loss.hpp" #include "ops/triplet_margin_with_distance_loss.hpp" #include "ops/unfold.hpp" @@ -218,6 +219,7 @@ inline void bind(py::module &m) { bind_selu(m); bind_sinh(m); bind_layer_norm(m); + bind_topksoftmax(m); } } // namespace infinicore::ops diff --git a/src/infinicore/pybind11/ops/topksoftmax.hpp b/src/infinicore/pybind11/ops/topksoftmax.hpp new file mode 100644 index 000000000..ca93edff0 --- /dev/null +++ b/src/infinicore/pybind11/ops/topksoftmax.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include + +#include "infinicore/ops/topksoftmax.hpp" + +namespace py = pybind11; + +namespace infinicore::ops { + +inline void bind_topksoftmax(py::module &m) { + m.def("topksoftmax", + &op::topksoftmax, + py::arg("values"), + py::arg("indices"), + py::arg("x"), + py::arg("topk"), + py::arg("norm") = 0, + R"doc(In-place Top-k Softmax. + +Writes results to pre-allocated values and indices tensors. + +Args: + values: Output tensor for softmax weights [N, topk] + indices: Output tensor for selected indices [N, topk], int32 + x: Input tensor [N, width], router logits + topk: Number of top values to select + norm: Whether to re-normalize top-k probabilities (1=yes, 0=no), default 0 +)doc"); +} + +} // namespace infinicore::ops diff --git a/test/infinicore/ops/topksoftmax.py b/test/infinicore/ops/topksoftmax.py new file mode 100644 index 000000000..76582ecf0 --- /dev/null +++ b/test/infinicore/ops/topksoftmax.py @@ -0,0 +1,147 @@ +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import torch +import torch.nn.functional as F +from framework import ( + BaseOperatorTest, + GenericTestRunner, + TensorInitializer, + TensorSpec, + TestCase, + is_broadcast, +) +from infinicore.lib import _infinicore + +import infinicore + +# (input_shape, input_strides, topk, norm) — norm is 0/1 for C++ binding (infiniop bool). +# Strides None only: kernel path matches contiguous layouts as in test/infiniop/topksoftmax.py. +_TEST_CASES_DATA = [ + ((1, 10), None, 7, 1), + ((8, 20), None, 4, 1), + ((2, 64), None, 6, 1), + ((4, 16), None, 3, 0), +] + +_TOLERANCE_MAP = { + infinicore.float16: {"atol": 1e-3, "rtol": 1e-3}, + infinicore.float32: {"atol": 1e-3, "rtol": 1e-3}, + infinicore.bfloat16: {"atol": 1e-3, "rtol": 1e-3}, +} + +_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16, infinicore.float32] + + +def torch_topksoftmax(router_logits, top_k, norm_topk_prob=False): + """Reference implementation aligned with test/infiniop/topksoftmax.py.""" + routing_weights = F.softmax(router_logits, dim=-1, dtype=torch.float32) + routing_weights, selected_experts = torch.topk(routing_weights, top_k, dim=-1) + if norm_topk_prob: + routing_weights = routing_weights / routing_weights.sum(dim=-1, keepdim=True) + return routing_weights, selected_experts.to(torch.int32) + + +def parse_test_cases(): + test_cases = [] + for shape, in_strides, topk, norm in _TEST_CASES_DATA: + for dtype in _TENSOR_DTYPES: + tol = _TOLERANCE_MAP.get(dtype, {"atol": 1e-3, "rtol": 1e-3}) + base = ( + torch.arange(0, shape[0] * shape[1], dtype=torch.float32) + .reshape(shape) + * 0.5 + ) + input_spec = TensorSpec.from_tensor( + shape, + in_strides, + dtype, + init_mode=TensorInitializer.MANUAL, + set_tensor=base, + ) + n = shape[0] + out_shape = (n, topk) + + desc_parts = [f"topk={topk}", f"norm={norm}"] + if in_strides: + desc_parts.append(f"strides={in_strides}") + suffix = ", ".join(desc_parts) + + kwargs = {"topk": topk, "norm": norm} + + test_cases.append( + TestCase( + inputs=[input_spec], + kwargs=kwargs, + output_spec=None, + comparison_target=None, + tolerance=tol, + description=f"topksoftmax - OUT_OF_PLACE - {suffix}", + output_count=2, + ) + ) + + values_spec = TensorSpec.from_tensor(out_shape, None, infinicore.float32) + indices_spec = TensorSpec.from_tensor(out_shape, None, infinicore.int32) + + if not is_broadcast(values_spec.strides) and not is_broadcast( + indices_spec.strides + ): + test_cases.append( + TestCase( + inputs=[input_spec], + kwargs=kwargs.copy(), + output_specs=[values_spec, indices_spec], + comparison_target="out", + tolerance=tol, + description=f"topksoftmax - INPLACE(out) - {suffix}", + output_count=2, + ) + ) + + return test_cases + + +class OpTest(BaseOperatorTest): + def __init__(self): + super().__init__("topksoftmax") + + def get_test_cases(self): + return parse_test_cases() + + def torch_operator(self, x, topk, norm=0, out=None, **kwargs): + norm_bool = norm != 0 + values, indices = torch_topksoftmax(x, topk, norm_bool) + if out is not None: + out_v, out_i = out + out_v.copy_(values) + out_i.copy_(indices) + return values, indices + + def infinicore_operator(self, x, topk, norm=0, out=None, **kwargs): + n = x.shape[0] + if out is None: + values = infinicore.empty( + (n, topk), dtype=infinicore.float32, device=x.device + ) + indices = infinicore.empty( + (n, topk), dtype=infinicore.int32, device=x.device + ) + else: + values, indices = out[0], out[1] + + _infinicore.topksoftmax( + values._underlying, indices._underlying, x._underlying, topk, int(norm) + ) + return values, indices + + +def main(): + runner = GenericTestRunner(OpTest) + runner.run_and_exit() + + +if __name__ == "__main__": + main()