Fix reverse NVVM barrier calls for LLVM 21+ CUDA intrinsics#2785
Fix reverse NVVM barrier calls for LLVM 21+ CUDA intrinsics#2785
Conversation
Agent-Logs-Url: https://github.com/EnzymeAD/Enzyme/sessions/2c2c6db1-8a94-4691-a34c-569c8949f747 Co-authored-by: minansys <149007967+minansys@users.noreply.github.com>
minansys
left a comment
There was a problem hiding this comment.
change "#if LLVM_VERSION_MAJOR > 20
auto BarrierInst = Arch == Triple::amdgcn
? (llvm::Intrinsic::ID)Intrinsic::amdgcn_s_barrier
: (llvm::Intrinsic::ID)
Intrinsic::nvvm_barrier_cta_sync_aligned_all;
#else
auto BarrierInst = Arch == Triple::amdgcn
? (llvm::Intrinsic::ID)Intrinsic::amdgcn_s_barrier
: (llvm::Intrinsic::ID)Intrinsic::nvvm_barrier0;
#endif
instbuilder.CreateCall(
getIntrinsicDeclaration(gutils->newFunc->getParent(), BarrierInst),
{});" to "#if LLVM_VERSION_MAJOR > 20
auto BarrierInst = Arch == Triple::amdgcn
? (llvm::Intrinsic::ID)Intrinsic::amdgcn_s_barrier
: (llvm::Intrinsic::ID)
Intrinsic::nvvm_barrier_cta_sync_aligned_all;
#else
auto BarrierInst = Arch == Triple::amdgcn
? (llvm::Intrinsic::ID)Intrinsic::amdgcn_s_barrier
: (llvm::Intrinsic::ID)Intrinsic::nvvm_barrier0;
#endif
SmallVector<Value *, 1> BarrierArgs;
#if LLVM_VERSION_MAJOR > 20
if (Arch != Triple::amdgcn) {
BarrierArgs.push_back(
ConstantInt::get(Type::getInt32Ty(gutils->newFunc->getContext()),
0));
}
#endif
instbuilder.CreateCall(
getIntrinsicDeclaration(gutils->newFunc->getParent(), BarrierInst),
BarrierArgs);" in Enzyme/EnzymeLogic.cpp 2) use clang format to format it to pass the format reuirement "DoozyX/clang-format-lint-action@v0.20
with:
source: enzyme/Enzyme enzyme/tools/enzyme-tblgen
exclude: ./CMakeLists.txt enzyme/Enzyme/SCEV
style: llvm
clangFormatVersion: 16
extensions: c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx
inplace: false"
Agent-Logs-Url: https://github.com/EnzymeAD/Enzyme/sessions/0b1ed8c1-4684-4ccb-aa22-f3c5a8317d93 Co-authored-by: minansys <149007967+minansys@users.noreply.github.com>
Addressed in 078c6be. |
superustc
left a comment
There was a problem hiding this comment.
fix this formating issue "--- enzyme/Enzyme/AdjointGenerator.h (original)
+++ enzyme/Enzyme/AdjointGenerator.h (reformatted)
@@ -4013,8 +4013,8 @@
case Intrinsic::nvvm_barrier0_or: {
SmallVector<Value *, 1> args = {
ConstantInt::get(Type::getInt32Ty(M->getContext()), 0)};
-
auto *Fn = -
getIntrinsicDeclaration(M, Intrinsic::nvvm_barrier_cta_sync_aligned_all);
-
auto *Fn = getIntrinsicDeclaration( -
M, Intrinsic::nvvm_barrier_cta_sync_aligned_all); auto cal = cast<CallInst>(Builder2.CreateCall(Fn, args)); cal->setCallingConv(Fn->getCallingConv()); cal->setDebugLoc(gutils->getNewFromOriginal(I.getDebugLoc()));
@@ -4025,8 +4025,8 @@
case Intrinsic::nvvm_barrier_cta_red_or_aligned_all:
case Intrinsic::nvvm_barrier_cta_red_popc_aligned_all: {
SmallVector<Value *, 1> args = {I.getOperand(0)};
-
auto *Fn = -
getIntrinsicDeclaration(M, Intrinsic::nvvm_barrier_cta_sync_aligned_all);
-
auto *Fn = getIntrinsicDeclaration( -
M, Intrinsic::nvvm_barrier_cta_sync_aligned_all); auto cal = cast<CallInst>(Builder2.CreateCall(Fn, args)); cal->setCallingConv(Fn->getCallingConv()); cal->setDebugLoc(gutils->getNewFromOriginal(I.getDebugLoc()));"
minansys
left a comment
There was a problem hiding this comment.
fix this clang format issue "--- enzyme/Enzyme/AdjointGenerator.h (original)
+++ enzyme/Enzyme/AdjointGenerator.h (reformatted)
@@ -4013,8 +4013,8 @@
case Intrinsic::nvvm_barrier0_or: {
SmallVector<Value *, 1> args = {
ConstantInt::get(Type::getInt32Ty(M->getContext()), 0)};
-
auto *Fn = -
getIntrinsicDeclaration(M, Intrinsic::nvvm_barrier_cta_sync_aligned_all);
-
auto *Fn = getIntrinsicDeclaration( -
M, Intrinsic::nvvm_barrier_cta_sync_aligned_all); auto cal = cast<CallInst>(Builder2.CreateCall(Fn, args)); cal->setCallingConv(Fn->getCallingConv()); cal->setDebugLoc(gutils->getNewFromOriginal(I.getDebugLoc()));
@@ -4025,8 +4025,8 @@
case Intrinsic::nvvm_barrier_cta_red_or_aligned_all:
case Intrinsic::nvvm_barrier_cta_red_popc_aligned_all: {
SmallVector<Value *, 1> args = {I.getOperand(0)};
-
auto *Fn = -
getIntrinsicDeclaration(M, Intrinsic::nvvm_barrier_cta_sync_aligned_all);
-
auto *Fn = getIntrinsicDeclaration( -
M, Intrinsic::nvvm_barrier_cta_sync_aligned_all); auto cal = cast<CallInst>(Builder2.CreateCall(Fn, args)); cal->setCallingConv(Fn->getCallingConv()); cal->setDebugLoc(gutils->getNewFromOriginal(I.getDebugLoc()));"
minansys
left a comment
There was a problem hiding this comment.
fix the the clang format issue "--- enzyme/Enzyme/AdjointGenerator.h (original)
+++ enzyme/Enzyme/AdjointGenerator.h (reformatted)
@@ -4013,8 +4013,8 @@
case Intrinsic::nvvm_barrier0_or: {
SmallVector<Value *, 1> args = {
ConstantInt::get(Type::getInt32Ty(M->getContext()), 0)};
-
auto *Fn = -
getIntrinsicDeclaration(M, Intrinsic::nvvm_barrier_cta_sync_aligned_all);
-
auto *Fn = getIntrinsicDeclaration( -
M, Intrinsic::nvvm_barrier_cta_sync_aligned_all); auto cal = cast<CallInst>(Builder2.CreateCall(Fn, args)); cal->setCallingConv(Fn->getCallingConv()); cal->setDebugLoc(gutils->getNewFromOriginal(I.getDebugLoc()));
@@ -4025,8 +4025,8 @@
case Intrinsic::nvvm_barrier_cta_red_or_aligned_all:
case Intrinsic::nvvm_barrier_cta_red_popc_aligned_all: {
SmallVector<Value *, 1> args = {I.getOperand(0)};
-
auto *Fn = -
getIntrinsicDeclaration(M, Intrinsic::nvvm_barrier_cta_sync_aligned_all);
-
auto *Fn = getIntrinsicDeclaration( -
M, Intrinsic::nvvm_barrier_cta_sync_aligned_all); auto cal = cast<CallInst>(Builder2.CreateCall(Fn, args)); cal->setCallingConv(Fn->getCallingConv()); cal->setDebugLoc(gutils->getNewFromOriginal(I.getDebugLoc()));"
Agent-Logs-Url: https://github.com/EnzymeAD/Enzyme/sessions/25c353e0-b395-4365-bbd9-3dda55c8f3e1 Co-authored-by: minansys <149007967+minansys@users.noreply.github.com>
Fixed in 01e0b55. |
Already fixed in 01e0b55. |
Enzyme was emitting malformed reverse-mode NVVM barrier calls on newer LLVM/Clang CUDA toolchains. In particular, LLVM 21+
llvm.nvvm.barrier.cta.sync.aligned.*intrinsics require explicit operands, but Enzyme was recreating some reverse barrier calls with an empty argument list.Reverse-mode NVVM barrier lowering
llvm.nvvm.barrier0()llvm.nvvm.barrier.cta.sync.aligned.all/countwith the required barrier id / count operandsbarrier.cta.red.*.aligned.{all,count}reductions back to the matching sync intrinsic with the correct operandsbarrier0_*reduction intrinsics still use the old operand shapeVersion-specific correctness
>20versions the same0when lowering LLVM 21 legacy reduction barriers tosync.aligned.all, matching LLVM’s NVVM upgrade semanticsRegression coverage
llvm.nvvm.barrier0()llvm.nvvm.barrier.cta.sync.aligned.all(i32)llvm.nvvm.barrier.cta.sync.aligned.count(i32, i32)Example of the corrected reverse IR shape on newer LLVM: