AMDGPU: really don't commute REV opcodes if the target variant doesn't exist

If pseudoToMCOpcode failed, we would return the original opcode, so operands
would be swapped, but the instruction would remain the same.
It resulted in LSHLREV a, b ---> LSHLREV b, a.

This fixes Glamor text rendering and
piglit/arb_sample_shading-builtin-gl-sample-mask on VI.

This is a candidate for stable branches.

v2: the test was simplified by Tom Stellard

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240824 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Marek Olsak 2015-06-26 20:29:10 +00:00
parent 78c5b7fe3c
commit e874345be4
3 changed files with 46 additions and 9 deletions

View File

@ -440,22 +440,22 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} }
} }
unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
const unsigned Opcode = MI.getOpcode(); const unsigned Opcode = MI.getOpcode();
int NewOpc; int NewOpc;
// Try to map original to commuted opcode // Try to map original to commuted opcode
NewOpc = AMDGPU::getCommuteRev(Opcode); NewOpc = AMDGPU::getCommuteRev(Opcode);
if (NewOpc != -1)
// Check if the commuted (REV) opcode exists on the target. // Check if the commuted (REV) opcode exists on the target.
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
return NewOpc;
// Try to map commuted to original opcode // Try to map commuted to original opcode
NewOpc = AMDGPU::getCommuteOrig(Opcode); NewOpc = AMDGPU::getCommuteOrig(Opcode);
if (NewOpc != -1)
// Check if the original (non-REV) opcode exists on the target. // Check if the original (non-REV) opcode exists on the target.
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1) return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
return NewOpc;
return Opcode; return Opcode;
} }
@ -771,6 +771,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
if (MI->getNumOperands() < 3) if (MI->getNumOperands() < 3)
return nullptr; return nullptr;
int CommutedOpcode = commuteOpcode(*MI);
if (CommutedOpcode == -1)
return nullptr;
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0); AMDGPU::OpName::src0);
assert(Src0Idx != -1 && "Should always have src0 operand"); assert(Src0Idx != -1 && "Should always have src0 operand");
@ -833,7 +837,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
} }
if (MI) if (MI)
MI->setDesc(get(commuteOpcode(*MI))); MI->setDesc(get(CommutedOpcode));
return MI; return MI;
} }

View File

@ -117,7 +117,7 @@ public:
// register. If there is no hardware instruction that can store to \p // register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned. // DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
unsigned commuteOpcode(const MachineInstr &MI) const; int commuteOpcode(const MachineInstr &MI) const;
MachineInstr *commuteInstruction(MachineInstr *MI, MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const override; bool NewMI = false) const override;

View File

@ -0,0 +1,33 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}main:
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
define void @main() #0 {
main_body:
%0 = fptosi float undef to i32
%1 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> undef, <32 x i8> undef, i32 2)
%2 = extractelement <4 x i32> %1, i32 0
%3 = and i32 %0, 7
%4 = shl i32 1, %3
%5 = and i32 %2, %4
%6 = icmp eq i32 %5, 0
%.10 = select i1 %6, float 0.000000e+00, float undef
%7 = call i32 @llvm.SI.packf16(float undef, float %.10)
%8 = bitcast i32 %7 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %8, float undef, float %8)
ret void
}
; Function Attrs: nounwind readnone
declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }