diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 466d24771ce..da8aa5b7a98 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -1323,6 +1323,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, // e64 version if available, -1 otherwise int OpcodeE64 = AMDGPU::getVOPe64(Opcode); const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64); + int InputModifiers[3] = {0}; assert(!DescE64 || DescE64->getNumDefs() == NumDefs); @@ -1399,7 +1400,10 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, } } - if (DescE64 && !Immediate) { + if (Immediate) + continue; + + if (DescE64) { // Test if it makes sense to switch to e64 encoding unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass; @@ -1418,6 +1422,31 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, DescE64 = nullptr; } } + + if (!DescE64 && !Promote2e64) + continue; + if (!Operand.isMachineOpcode()) + continue; + if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) { + Ops.pop_back(); + Ops.push_back(Operand.getOperand(0)); + InputModifiers[i] = 1; + Promote2e64 = true; + if (!DescE64) + continue; + Desc = DescE64; + DescE64 = 0; + } + else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) { + Ops.pop_back(); + Ops.push_back(Operand.getOperand(0)); + InputModifiers[i] = 2; + Promote2e64 = true; + if (!DescE64) + continue; + Desc = DescE64; + DescE64 = 0; + } } if (Promote2e64) { @@ -1425,7 +1454,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, Ops.clear(); for (unsigned i = 0; i < OldOps.size(); ++i) { // src_modifier - Ops.push_back(DAG.getTargetConstant(0, MVT::i32)); + Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32)); Ops.push_back(OldOps[i]); } // Add the modifier flags while promoting diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll index 2cd3a4f604f..b87ce225409 100644 --- a/test/CodeGen/R600/fabs.ll +++ b/test/CodeGen/R600/fabs.ll @@ -49,6 +49,17 @@ entry: ret void } +; SI-CHECK-LABEL: @fabs_fold +; SI-CHECK-NOT: V_AND_B32_e32 +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}| +define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) { +entry: + %0 = call float @fabs(float %in0) + %1 = fmul float %0, %in1 + store float %1, float addrspace(1)* %out + ret void +} + declare float @fabs(float ) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index 7ad760cadbb..4cddc737895 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -59,3 +59,14 @@ entry: store float %1, float addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @fneg_fold +; SI-CHECK-NOT: V_XOR_B32 +; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} +define void @fneg_fold(float addrspace(1)* %out, float %in) { +entry: + %0 = fsub float -0.0, %in + %1 = fmul float %0, %in + store float %1, float addrspace(1)* %out + ret void +}