diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp index 053ea8a90b7..218750d445e 100644 --- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp @@ -129,6 +129,22 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { return GEP->getOperand(2); } +// Not an instruction handled below to turn into a vector. +// +// TODO: Check isTriviallyVectorizable for calls and handle other +// instructions. +static bool canVectorizeInst(Instruction *Inst) { + switch (Inst->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + return true; + default: + return false; + } +} + static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { Type *AllocaTy = Alloca->getAllocatedType(); @@ -149,6 +165,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast(AllocaUser); if (!GEP) { + if (!canVectorizeInst(cast(AllocaUser))) + return false; + WorkList.push_back(AllocaUser); continue; } @@ -164,6 +183,9 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { + if (!canVectorizeInst(cast(GEPUser))) + return false; + WorkList.push_back(GEPUser); } } @@ -201,12 +223,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { break; } case Instruction::BitCast: + case Instruction::AddrSpaceCast: break; default: Inst->dump(); - llvm_unreachable("Do not know how to replace this instruction " - "with vector op"); + llvm_unreachable("Inconsistency in instructions promotable to vector"); } } return true; diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll new file mode 100644 index 00000000000..def4f9dee52 --- /dev/null +++ b/test/CodeGen/R600/private-memory-atomics.ll @@ -0,0 +1,31 @@ +; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s + +; This works because promote allocas pass replaces these with LDS atomics. + +; Private atomics have no real use, but at least shouldn't crash on it. +define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in + %tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel + store i32 %tmp4, i32 addrspace(1)* %out + ret void +} + +define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in + %tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic + %val = extractvalue { i32, i1 } %tmp4, 0 + store i32 %val, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll new file mode 100644 index 00000000000..a5f61637e91 --- /dev/null +++ b/test/CodeGen/R600/private-memory-broken.ll @@ -0,0 +1,20 @@ +; RUN: not llc -verify-machineinstrs -march=r600 -mcpu=SI %s 2>&1 | FileCheck %s + +; Make sure promote alloca pass doesn't crash + +; CHECK: unsupported call + +declare i32 @foo(i32*) nounwind + +define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in + %val = call i32 @foo(i32* %tmp3) nounwind + store i32 %val, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index 22bd3589571..89122bec348 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -267,5 +267,19 @@ entry: %load = load i32* %gep2 store i32 %load, i32 addrspace(1)* %out ret void - } + +define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %cmp = icmp eq i32 %in, 0 + %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2 + %load = load i32* %sel + store i32 %load, i32 addrspace(1)* %out + ret void +} +