mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-01 00:33:09 +00:00
R600/SI: Use V_ADD_F32 instead of V_MOV_B32 for clamp/neg/abs modifiers.
The modifiers don't seem to have any effect with V_MOV_B32, supposedly it's meant to just move bits untouched. Fixes 46 piglit tests with radeonsi, though unfortunately 11 of those had just regressed because they started using the clamp modifier. NOTE: This is a candidate for the Mesa stable branch. Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174890 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
117de489a0
commit
311ea66db1
@ -74,13 +74,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||||
case AMDGPU::BRANCH: return BB;
|
case AMDGPU::BRANCH: return BB;
|
||||||
case AMDGPU::CLAMP_SI:
|
case AMDGPU::CLAMP_SI:
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
// VSRC1-2 are unused, but we still need to fill all the
|
.addReg(AMDGPU::SREG_LIT_0)
|
||||||
// operand slots, so we just reuse the VSRC0 operand
|
.addReg(AMDGPU::SREG_LIT_0)
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0) // ABS
|
.addImm(0) // ABS
|
||||||
.addImm(1) // CLAMP
|
.addImm(1) // CLAMP
|
||||||
.addImm(0) // OMOD
|
.addImm(0) // OMOD
|
||||||
@ -89,13 +87,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case AMDGPU::FABS_SI:
|
case AMDGPU::FABS_SI:
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
// VSRC1-2 are unused, but we still need to fill all the
|
.addReg(AMDGPU::SREG_LIT_0)
|
||||||
// operand slots, so we just reuse the VSRC0 operand
|
.addReg(AMDGPU::SREG_LIT_0)
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(1) // ABS
|
.addImm(1) // ABS
|
||||||
.addImm(0) // CLAMP
|
.addImm(0) // CLAMP
|
||||||
.addImm(0) // OMOD
|
.addImm(0) // OMOD
|
||||||
@ -104,13 +100,11 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case AMDGPU::FNEG_SI:
|
case AMDGPU::FNEG_SI:
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_ADD_F32_e64))
|
||||||
.addOperand(MI->getOperand(0))
|
.addOperand(MI->getOperand(0))
|
||||||
.addOperand(MI->getOperand(1))
|
.addOperand(MI->getOperand(1))
|
||||||
// VSRC1-2 are unused, but we still need to fill all the
|
.addReg(AMDGPU::SREG_LIT_0)
|
||||||
// operand slots, so we just reuse the VSRC0 operand
|
.addReg(AMDGPU::SREG_LIT_0)
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0) // ABS
|
.addImm(0) // ABS
|
||||||
.addImm(0) // CLAMP
|
.addImm(0) // CLAMP
|
||||||
.addImm(0) // OMOD
|
.addImm(0) // OMOD
|
||||||
|
Loading…
Reference in New Issue
Block a user