mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 06:32:09 +00:00
Combine fmul vector FP constants when unsafe math is allowed.
This is an extension of the change made with r215820: http://llvm.org/viewvc/llvm-project?view=revision&revision=215820 That patch allowed combining of splatted vector FP constants that are multiplied. This patch allows combining non-uniform vector FP constants too by relaxing the check on the type of vector. Also, canonicalize a vector fmul in the same way that we already do for scalars - if only one operand of the fmul is a constant, make it operand 1. Otherwise, we miss potential folds. This fold is also done by -instcombine, but it's possible that extra fmuls may have been generated during lowering. Differential Revision: http://reviews.llvm.org/D5254 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217599 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
338692246d
commit
04bb0e721f
@ -6820,8 +6820,16 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
|
||||
|
||||
// fold vector ops
|
||||
if (VT.isVector()) {
|
||||
// This just handles C1 * C2 for vectors. Other vector folds are below.
|
||||
SDValue FoldedVOp = SimplifyVBinOp(N);
|
||||
if (FoldedVOp.getNode()) return FoldedVOp;
|
||||
if (FoldedVOp.getNode())
|
||||
return FoldedVOp;
|
||||
// Canonicalize vector constant to RHS.
|
||||
if (N0.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
N1.getOpcode() != ISD::BUILD_VECTOR)
|
||||
if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
|
||||
if (BV0->isConstant())
|
||||
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
|
||||
}
|
||||
|
||||
// fold (fmul c1, c2) -> c1*c2
|
||||
@ -6842,11 +6850,19 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
|
||||
return N1;
|
||||
|
||||
// fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
|
||||
if (N1CFP && N0.getOpcode() == ISD::FMUL &&
|
||||
N0.getNode()->hasOneUse() && isConstOrConstSplatFP(N0.getOperand(1))) {
|
||||
SDLoc SL(N);
|
||||
SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(1), N1);
|
||||
return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
|
||||
if (N0.getOpcode() == ISD::FMUL) {
|
||||
// Fold scalars or any vector constants (not just splats).
|
||||
// This fold is done in general by InstCombine, but extra fmul insts
|
||||
// may have been generated during lowering.
|
||||
SDValue N01 = N0.getOperand(1);
|
||||
auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
|
||||
auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
|
||||
if ((N1CFP && isConstOrConstSplatFP(N01)) ||
|
||||
(BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
|
||||
SDLoc SL(N);
|
||||
SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
|
||||
return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
|
||||
}
|
||||
}
|
||||
|
||||
// fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
|
||||
|
@ -55,6 +55,54 @@ define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
|
||||
ret <4 x float> %z
|
||||
}
|
||||
|
||||
; We should be able to pre-multiply the two constant vectors.
|
||||
; CHECK: ## float 5.000000e+00
|
||||
; CHECK: ## float 1.200000e+01
|
||||
; CHECK: ## float 2.100000e+01
|
||||
; CHECK: ## float 3.200000e+01
|
||||
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat:
|
||||
; CHECK: mulps
|
||||
; CHECK-NOT: mulps
|
||||
; CHECK-NEXT: ret
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 {
|
||||
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
|
||||
%z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
ret <4 x float> %z
|
||||
}
|
||||
|
||||
; Same as above, but reverse operands to make sure non-canonical form is also handled.
|
||||
; CHECK: ## float 5.000000e+00
|
||||
; CHECK: ## float 1.200000e+01
|
||||
; CHECK: ## float 2.100000e+01
|
||||
; CHECK: ## float 3.200000e+01
|
||||
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical:
|
||||
; CHECK: mulps
|
||||
; CHECK-NOT: mulps
|
||||
; CHECK-NEXT: ret
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 {
|
||||
%y = fmul <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
|
||||
%z = fmul <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>, %y
|
||||
ret <4 x float> %z
|
||||
}
|
||||
|
||||
; More than one use of a constant multiply should not inhibit the optimization.
|
||||
; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
|
||||
; CHECK: ## float 5.000000e+00
|
||||
; CHECK: ## float 1.200000e+01
|
||||
; CHECK: ## float 2.100000e+01
|
||||
; CHECK: ## float 3.200000e+01
|
||||
; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
|
||||
; CHECK: mulps
|
||||
; CHECK: mulps
|
||||
; CHECK: addps
|
||||
; CHECK: ret
|
||||
define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
|
||||
%y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
|
||||
%z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
%a = fadd <4 x float> %y, %z
|
||||
ret <4 x float> %a
|
||||
}
|
||||
|
||||
; CHECK-LABEL: fmul_c2_c4_f32:
|
||||
; CHECK-NOT: addss
|
||||
; CHECK: mulss
|
||||
|
Loading…
x
Reference in New Issue
Block a user