diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 6f75ff1dbf4..cbd426d516e 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -260,7 +260,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // select, compare, etc.). SwapVector[VecIdx].IsSwappable = 1; break; - case PPC::XXPERMDI: + case PPC::XXPERMDI: { // This is a swap if it is of the form XXPERMDI t, s, s, 2. // Unfortunately, MachineCSE ignores COPY and SUBREG_TO_REG, so we // can also see XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), 2, @@ -268,9 +268,8 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { // SUBREG_TO_REG to find the real source value for comparison. // If the real source value is a physical register, then mark the // XXPERMDI as mentioning a physical register. - // Any other form of XXPERMDI is lane-sensitive and unsafe - // for the optimization. - if (MI.getOperand(3).getImm() == 2) { + int immed = MI.getOperand(3).getImm(); + if (immed == 2) { unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(), VecIdx); unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(), @@ -278,7 +277,26 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { if (trueReg1 == trueReg2) SwapVector[VecIdx].IsSwap = 1; } + // This is a doubleword splat if it is of the form + // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3. As above we + // must look through chains of copy-likes to find the source + // register. We turn off the marking for mention of a physical + // register, because splatting it is safe; the optimization + // will not swap the value in the physical register. + else if (immed == 0 || immed == 3) { + unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(), + VecIdx); + unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(), + VecIdx); + if (trueReg1 == trueReg2) { + SwapVector[VecIdx].IsSwappable = 1; + SwapVector[VecIdx].MentionsPhysVR = 0; + } + } + // Any other form of XXPERMDI is lane-sensitive and unsafe + // for the optimization. break; + } case PPC::LVX: // Non-permuting loads are currently unsafe. We can use special // handling for this in the future. By not marking these as @@ -307,14 +325,6 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() { SwapVector[VecIdx].IsStore = 1; SwapVector[VecIdx].IsSwap = 1; break; - case PPC::SUBREG_TO_REG: - // These are fine provided they are moving between full vector - // register classes. For example, the VRs are a subset of the - // VSRs, but each VR and each VSR is a full 128-bit register. - if (isVecReg(MI.getOperand(0).getReg()) && - isVecReg(MI.getOperand(2).getReg())) - SwapVector[VecIdx].IsSwappable = 1; - break; case PPC::COPY: // These are fine provided they are moving between full vector // register classes. diff --git a/test/CodeGen/PowerPC/swaps-le-3.ll b/test/CodeGen/PowerPC/swaps-le-3.ll new file mode 100644 index 00000000000..0c1748df9fc --- /dev/null +++ b/test/CodeGen/PowerPC/swaps-le-3.ll @@ -0,0 +1,24 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s + +; This test verifies that VSX swap optimization works for the +; doubleword splat idiom. + +@a = external global <2 x double>, align 16 +@b = external global <2 x double>, align 16 + +define void @test(double %s) { +entry: + %0 = insertelement <2 x double> undef, double %s, i32 0 + %1 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer + %2 = load <2 x double>, <2 x double>* @a, align 16 + %3 = fadd <2 x double> %0, %2 + store <2 x double> %3, <2 x double>* @b, align 16 + ret void +} + +; CHECK-LABEL: @test +; CHECK: xxspltd +; CHECK: lxvd2x +; CHECK: xvadddp +; CHECK: stxvd2x +; CHECK-NOT: xxswapd