mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-10-02 17:55:18 +00:00
AMDGPU: really don't commute REV opcodes if the target variant doesn't exist
If pseudoToMCOpcode failed, we would return the original opcode, so operands would be swapped, but the instruction would remain the same. It resulted in LSHLREV a, b ---> LSHLREV b, a. This fixes Glamor text rendering and piglit/arb_sample_shading-builtin-gl-sample-mask on VI. This is a candidate for stable branches. v2: the test was simplified by Tom Stellard git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240824 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
78c5b7fe3c
commit
e874345be4
@ -440,22 +440,22 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
|
int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
|
||||||
const unsigned Opcode = MI.getOpcode();
|
const unsigned Opcode = MI.getOpcode();
|
||||||
|
|
||||||
int NewOpc;
|
int NewOpc;
|
||||||
|
|
||||||
// Try to map original to commuted opcode
|
// Try to map original to commuted opcode
|
||||||
NewOpc = AMDGPU::getCommuteRev(Opcode);
|
NewOpc = AMDGPU::getCommuteRev(Opcode);
|
||||||
|
if (NewOpc != -1)
|
||||||
// Check if the commuted (REV) opcode exists on the target.
|
// Check if the commuted (REV) opcode exists on the target.
|
||||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
|
||||||
return NewOpc;
|
|
||||||
|
|
||||||
// Try to map commuted to original opcode
|
// Try to map commuted to original opcode
|
||||||
NewOpc = AMDGPU::getCommuteOrig(Opcode);
|
NewOpc = AMDGPU::getCommuteOrig(Opcode);
|
||||||
|
if (NewOpc != -1)
|
||||||
// Check if the original (non-REV) opcode exists on the target.
|
// Check if the original (non-REV) opcode exists on the target.
|
||||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1;
|
||||||
return NewOpc;
|
|
||||||
|
|
||||||
return Opcode;
|
return Opcode;
|
||||||
}
|
}
|
||||||
@ -771,6 +771,10 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
|||||||
if (MI->getNumOperands() < 3)
|
if (MI->getNumOperands() < 3)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
|
int CommutedOpcode = commuteOpcode(*MI);
|
||||||
|
if (CommutedOpcode == -1)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
|
||||||
AMDGPU::OpName::src0);
|
AMDGPU::OpName::src0);
|
||||||
assert(Src0Idx != -1 && "Should always have src0 operand");
|
assert(Src0Idx != -1 && "Should always have src0 operand");
|
||||||
@ -833,7 +837,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (MI)
|
if (MI)
|
||||||
MI->setDesc(get(commuteOpcode(*MI)));
|
MI->setDesc(get(CommutedOpcode));
|
||||||
|
|
||||||
return MI;
|
return MI;
|
||||||
}
|
}
|
||||||
|
@ -117,7 +117,7 @@ public:
|
|||||||
// register. If there is no hardware instruction that can store to \p
|
// register. If there is no hardware instruction that can store to \p
|
||||||
// DstRC, then AMDGPU::COPY is returned.
|
// DstRC, then AMDGPU::COPY is returned.
|
||||||
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
|
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
|
||||||
unsigned commuteOpcode(const MachineInstr &MI) const;
|
int commuteOpcode(const MachineInstr &MI) const;
|
||||||
|
|
||||||
MachineInstr *commuteInstruction(MachineInstr *MI,
|
MachineInstr *commuteInstruction(MachineInstr *MI,
|
||||||
bool NewMI = false) const override;
|
bool NewMI = false) const override;
|
||||||
|
33
test/CodeGen/AMDGPU/commute-shifts.ll
Normal file
33
test/CodeGen/AMDGPU/commute-shifts.ll
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
||||||
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
||||||
|
|
||||||
|
; GCN-LABEL: {{^}}main:
|
||||||
|
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||||
|
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
|
||||||
|
|
||||||
|
define void @main() #0 {
|
||||||
|
main_body:
|
||||||
|
%0 = fptosi float undef to i32
|
||||||
|
%1 = call <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32> undef, <32 x i8> undef, i32 2)
|
||||||
|
%2 = extractelement <4 x i32> %1, i32 0
|
||||||
|
%3 = and i32 %0, 7
|
||||||
|
%4 = shl i32 1, %3
|
||||||
|
%5 = and i32 %2, %4
|
||||||
|
%6 = icmp eq i32 %5, 0
|
||||||
|
%.10 = select i1 %6, float 0.000000e+00, float undef
|
||||||
|
%7 = call i32 @llvm.SI.packf16(float undef, float %.10)
|
||||||
|
%8 = bitcast i32 %7 to float
|
||||||
|
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %8, float undef, float %8)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readnone
|
||||||
|
declare <4 x i32> @llvm.SI.imageload.v4i32(<4 x i32>, <32 x i8>, i32) #1
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readnone
|
||||||
|
declare i32 @llvm.SI.packf16(float, float) #1
|
||||||
|
|
||||||
|
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||||
|
|
||||||
|
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
|
||||||
|
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue
Block a user