mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 06:32:09 +00:00
Synthesize SSE3/AVX 128 bit horizontal add/sub instructions from
floating point add/sub of appropriate shuffle vectors. Does not synthesize the 256 bit AVX versions because they work differently. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140332 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d102a03b36
commit
17470bee5f
@ -1137,6 +1137,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setTargetDAGCombine(ISD::OR);
|
||||
setTargetDAGCombine(ISD::AND);
|
||||
setTargetDAGCombine(ISD::ADD);
|
||||
setTargetDAGCombine(ISD::FADD);
|
||||
setTargetDAGCombine(ISD::FSUB);
|
||||
setTargetDAGCombine(ISD::SUB);
|
||||
setTargetDAGCombine(ISD::LOAD);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
@ -10647,6 +10649,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::FMIN: return "X86ISD::FMIN";
|
||||
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
|
||||
case X86ISD::FRCP: return "X86ISD::FRCP";
|
||||
case X86ISD::FHADD: return "X86ISD::FHADD";
|
||||
case X86ISD::FHSUB: return "X86ISD::FHSUB";
|
||||
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
|
||||
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
|
||||
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
|
||||
@ -13738,6 +13742,150 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// isHorizontalBinOp - Return 'true' if this vector operation is "horizontal"
|
||||
/// and return the operands for the horizontal operation in LHS and RHS. A
|
||||
/// horizontal operation performs the binary operation on successive elements
|
||||
/// of its first operand, then on successive elements of its second operand,
|
||||
/// returning the resulting values in a vector. For example, if
|
||||
/// A = < float a0, float a1, float a2, float a3 >
|
||||
/// and
|
||||
/// B = < float b0, float b1, float b2, float b3 >
|
||||
/// then the result of doing a horizontal operation on A and B is
|
||||
/// A horizontal-op B = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >.
|
||||
/// In short, LHS and RHS are inspected to see if LHS op RHS is of the form
|
||||
/// A horizontal-op B, for some already available A and B, and if so then LHS is
|
||||
/// set to A, RHS to B, and the routine returns 'true'.
|
||||
/// Note that the binary operation should have the property that if one of the
|
||||
/// operands is UNDEF then the result is UNDEF.
|
||||
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
|
||||
// Look for the following pattern: if
|
||||
// A = < float a0, float a1, float a2, float a3 >
|
||||
// B = < float b0, float b1, float b2, float b3 >
|
||||
// and
|
||||
// LHS = VECTOR_SHUFFLE A, B, <0, 2, 4, 6>
|
||||
// RHS = VECTOR_SHUFFLE A, B, <1, 3, 5, 7>
|
||||
// then LHS op RHS = < a0 op a1, a2 op a3, b0 op b1, b2 op b3 >
|
||||
// which is A horizontal-op B.
|
||||
|
||||
// At least one of the operands should be a vector shuffle.
|
||||
if (LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
|
||||
RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
|
||||
return false;
|
||||
|
||||
EVT VT = LHS.getValueType();
|
||||
unsigned N = VT.getVectorNumElements();
|
||||
|
||||
// View LHS in the form
|
||||
// LHS = VECTOR_SHUFFLE A, B, LMask
|
||||
// If LHS is not a shuffle then pretend it is the shuffle
|
||||
// LHS = VECTOR_SHUFFLE LHS, undef, <0, 1, ..., N-1>
|
||||
// NOTE: in what follows a default initialized SDValue represents an UNDEF of
|
||||
// type VT.
|
||||
SDValue A, B;
|
||||
SmallVector<int, 8> LMask(N);
|
||||
if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
|
||||
A = LHS.getOperand(0);
|
||||
if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
|
||||
B = LHS.getOperand(1);
|
||||
cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask);
|
||||
} else {
|
||||
if (LHS.getOpcode() != ISD::UNDEF)
|
||||
A = LHS;
|
||||
for (unsigned i = 0; i != N; ++i)
|
||||
LMask[i] = i;
|
||||
}
|
||||
|
||||
// Likewise, view RHS in the form
|
||||
// RHS = VECTOR_SHUFFLE C, D, RMask
|
||||
SDValue C, D;
|
||||
SmallVector<int, 8> RMask(N);
|
||||
if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
|
||||
if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
|
||||
C = RHS.getOperand(0);
|
||||
if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
|
||||
D = RHS.getOperand(1);
|
||||
cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask);
|
||||
} else {
|
||||
if (RHS.getOpcode() != ISD::UNDEF)
|
||||
C = RHS;
|
||||
for (unsigned i = 0; i != N; ++i)
|
||||
RMask[i] = i;
|
||||
}
|
||||
|
||||
// Check that the shuffles are both shuffling the same vectors.
|
||||
if (!(A == C && B == D) && !(A == D && B == C))
|
||||
return false;
|
||||
|
||||
// If everything is UNDEF then bail out: it would be better to fold to UNDEF.
|
||||
if (!A.getNode() && !B.getNode())
|
||||
return false;
|
||||
|
||||
// If A and B occur in reverse order in RHS, then "swap" them (which means
|
||||
// rewriting the mask).
|
||||
if (A != C)
|
||||
for (unsigned i = 0; i != N; ++i) {
|
||||
unsigned Idx = RMask[i];
|
||||
if (Idx < N)
|
||||
RMask[i] += N;
|
||||
else if (Idx < 2*N)
|
||||
RMask[i] -= N;
|
||||
}
|
||||
|
||||
// At this point LHS and RHS are equivalent to
|
||||
// LHS = VECTOR_SHUFFLE A, B, LMask
|
||||
// RHS = VECTOR_SHUFFLE A, B, RMask
|
||||
// Check that the masks correspond to performing a horizontal operation.
|
||||
for (unsigned i = 0; i != N; ++i) {
|
||||
unsigned LIdx = LMask[i], RIdx = RMask[i];
|
||||
|
||||
// Ignore any UNDEF components.
|
||||
if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
|
||||
|| (!B.getNode() && (LIdx >= N || RIdx >= N)))
|
||||
continue;
|
||||
|
||||
// Check that successive elements are being operated on. If not, this is
|
||||
// not a horizontal operation.
|
||||
if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
|
||||
!(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
|
||||
return false;
|
||||
}
|
||||
|
||||
LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
|
||||
RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
|
||||
return true;
|
||||
}
|
||||
|
||||
/// PerformFADDCombine - Do target-specific dag combines on floating point adds.
|
||||
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
|
||||
// Try to synthesize horizontal adds from adds of shuffles.
|
||||
if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
|
||||
(VT == MVT::v4f32 || VT == MVT::v2f64) &&
|
||||
isHorizontalBinOp(LHS, RHS, true))
|
||||
return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformFSUBCombine - Do target-specific dag combines on floating point subs.
|
||||
static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
|
||||
// Try to synthesize horizontal subs from subs of shuffles.
|
||||
if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
|
||||
(VT == MVT::v4f32 || VT == MVT::v2f64) &&
|
||||
isHorizontalBinOp(LHS, RHS, false))
|
||||
return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
|
||||
/// X86ISD::FXOR nodes.
|
||||
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
@ -13975,6 +14123,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
|
||||
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
|
||||
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
|
||||
case X86ISD::FXOR:
|
||||
case X86ISD::FOR: return PerformFORCombine(N, DAG);
|
||||
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
|
||||
|
@ -178,6 +178,12 @@ namespace llvm {
|
||||
/// BLEND family of opcodes
|
||||
BLENDV,
|
||||
|
||||
/// FHADD - Floating point horizontal add.
|
||||
FHADD,
|
||||
|
||||
/// FHSUB - Floating point horizontal sub.
|
||||
FHSUB,
|
||||
|
||||
/// FMAX, FMIN - Floating point max and min.
|
||||
///
|
||||
FMAX, FMIN,
|
||||
|
@ -39,6 +39,8 @@ def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
|
||||
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
|
||||
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
|
||||
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
|
||||
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
|
||||
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
|
||||
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
|
||||
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
|
||||
def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>;
|
||||
|
@ -4714,62 +4714,122 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
|
||||
|
||||
// Horizontal ops
|
||||
multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
|
||||
X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
|
||||
X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
|
||||
def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
|
||||
|
||||
def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
|
||||
}
|
||||
multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
|
||||
X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
|
||||
X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
|
||||
def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
|
||||
|
||||
def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hadd_ps, 0>, VEX_4V;
|
||||
X86fhadd, 0>, VEX_4V;
|
||||
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hadd_pd, 0>, VEX_4V;
|
||||
X86fhadd, 0>, VEX_4V;
|
||||
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hsub_ps, 0>, VEX_4V;
|
||||
X86fhsub, 0>, VEX_4V;
|
||||
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hsub_pd, 0>, VEX_4V;
|
||||
X86fhsub, 0>, VEX_4V;
|
||||
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
|
||||
int_x86_avx_hadd_ps_256, 0>, VEX_4V;
|
||||
X86fhadd, 0>, VEX_4V;
|
||||
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
|
||||
int_x86_avx_hadd_pd_256, 0>, VEX_4V;
|
||||
X86fhadd, 0>, VEX_4V;
|
||||
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
|
||||
int_x86_avx_hsub_ps_256, 0>, VEX_4V;
|
||||
X86fhsub, 0>, VEX_4V;
|
||||
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
|
||||
int_x86_avx_hsub_pd_256, 0>, VEX_4V;
|
||||
X86fhsub, 0>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), VR128:$src2),
|
||||
(VHADDPSrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), (memop addr:$src2)),
|
||||
(VHADDPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), VR128:$src2),
|
||||
(VHADDPDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), (memop addr:$src2)),
|
||||
(VHADDPDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), VR128:$src2),
|
||||
(VHSUBPSrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), (memop addr:$src2)),
|
||||
(VHSUBPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), VR128:$src2),
|
||||
(VHSUBPDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), (memop addr:$src2)),
|
||||
(VHSUBPDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_avx_hadd_ps_256 (v8f32 VR256:$src1), VR256:$src2),
|
||||
(VHADDPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(int_x86_avx_hadd_ps_256 (v8f32 VR256:$src1), (memop addr:$src2)),
|
||||
(VHADDPSYrm VR256:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_avx_hadd_pd_256 (v4f64 VR256:$src1), VR256:$src2),
|
||||
(VHADDPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(int_x86_avx_hadd_pd_256 (v4f64 VR256:$src1), (memop addr:$src2)),
|
||||
(VHADDPDYrm VR256:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_avx_hsub_ps_256 (v8f32 VR256:$src1), VR256:$src2),
|
||||
(VHSUBPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(int_x86_avx_hsub_ps_256 (v8f32 VR256:$src1), (memop addr:$src2)),
|
||||
(VHSUBPSYrm VR256:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_avx_hsub_pd_256 (v4f64 VR256:$src1), VR256:$src2),
|
||||
(VHSUBPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(int_x86_avx_hsub_pd_256 (v4f64 VR256:$src1), (memop addr:$src2)),
|
||||
(VHSUBPDYrm VR256:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hadd_ps>;
|
||||
defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hadd_pd>;
|
||||
defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
|
||||
int_x86_sse3_hsub_ps>;
|
||||
defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
|
||||
int_x86_sse3_hsub_pd>;
|
||||
defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
|
||||
defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
|
||||
defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
|
||||
defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE3] in {
|
||||
def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), VR128:$src2),
|
||||
(HADDPSrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hadd_ps (v4f32 VR128:$src1), (memop addr:$src2)),
|
||||
(HADDPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), VR128:$src2),
|
||||
(HADDPDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hadd_pd (v2f64 VR128:$src1), (memop addr:$src2)),
|
||||
(HADDPDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), VR128:$src2),
|
||||
(HSUBPSrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hsub_ps (v4f32 VR128:$src1), (memop addr:$src2)),
|
||||
(HSUBPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), VR128:$src2),
|
||||
(HSUBPDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(int_x86_sse3_hsub_pd (v2f64 VR128:$src1), (memop addr:$src2)),
|
||||
(HSUBPDrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
194
test/CodeGen/X86/haddsub.ll
Normal file
194
test/CodeGen/X86/haddsub.ll
Normal file
@ -0,0 +1,194 @@
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+sse3,-avx | FileCheck %s -check-prefix=SSE3
|
||||
; RUN: llc < %s -march=x86-64 -mattr=-sse3,+avx | FileCheck %s -check-prefix=AVX
|
||||
|
||||
; SSE3: haddpd1:
|
||||
; SSE3-NOT: vhaddpd
|
||||
; SSE3: haddpd
|
||||
; AVX: haddpd1:
|
||||
; AVX: vhaddpd
|
||||
define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) {
|
||||
%a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
|
||||
%b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
|
||||
%r = fadd <2 x double> %a, %b
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
; SSE3: haddpd2:
|
||||
; SSE3-NOT: vhaddpd
|
||||
; SSE3: haddpd
|
||||
; AVX: haddpd2:
|
||||
; AVX: vhaddpd
|
||||
define <2 x double> @haddpd2(<2 x double> %x, <2 x double> %y) {
|
||||
%a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 2>
|
||||
%b = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 2, i32 1>
|
||||
%r = fadd <2 x double> %a, %b
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
; SSE3: haddpd3:
|
||||
; SSE3-NOT: vhaddpd
|
||||
; SSE3: haddpd
|
||||
; AVX: haddpd3:
|
||||
; AVX: vhaddpd
|
||||
define <2 x double> @haddpd3(<2 x double> %x) {
|
||||
%a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
|
||||
%b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
|
||||
%r = fadd <2 x double> %a, %b
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
; SSE3: haddps1:
|
||||
; SSE3-NOT: vhaddps
|
||||
; SSE3: haddps
|
||||
; AVX: haddps1:
|
||||
; AVX: vhaddps
|
||||
define <4 x float> @haddps1(<4 x float> %x, <4 x float> %y) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
||||
%r = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: haddps2:
|
||||
; SSE3-NOT: vhaddps
|
||||
; SSE3: haddps
|
||||
; AVX: haddps2:
|
||||
; AVX: vhaddps
|
||||
define <4 x float> @haddps2(<4 x float> %x, <4 x float> %y) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
|
||||
%b = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
|
||||
%r = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: haddps3:
|
||||
; SSE3-NOT: vhaddps
|
||||
; SSE3: haddps
|
||||
; AVX: haddps3:
|
||||
; AVX: vhaddps
|
||||
define <4 x float> @haddps3(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
|
||||
%r = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: haddps4:
|
||||
; SSE3-NOT: vhaddps
|
||||
; SSE3: haddps
|
||||
; AVX: haddps4:
|
||||
; AVX: vhaddps
|
||||
define <4 x float> @haddps4(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
||||
%r = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: haddps5:
|
||||
; SSE3-NOT: vhaddps
|
||||
; SSE3: haddps
|
||||
; AVX: haddps5:
|
||||
; AVX: vhaddps
|
||||
define <4 x float> @haddps5(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
|
||||
%r = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: haddps6:
|
||||
; SSE3-NOT: vhaddps
|
||||
; SSE3: haddps
|
||||
; AVX: haddps6:
|
||||
; AVX: vhaddps
|
||||
define <4 x float> @haddps6(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%r = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: haddps7:
|
||||
; SSE3-NOT: vhaddps
|
||||
; SSE3: haddps
|
||||
; AVX: haddps7:
|
||||
; AVX: vhaddps
|
||||
define <4 x float> @haddps7(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
|
||||
%r = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: hsubpd1:
|
||||
; SSE3-NOT: vhsubpd
|
||||
; SSE3: hsubpd
|
||||
; AVX: hsubpd1:
|
||||
; AVX: vhsubpd
|
||||
define <2 x double> @hsubpd1(<2 x double> %x, <2 x double> %y) {
|
||||
%a = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 0, i32 2>
|
||||
%b = shufflevector <2 x double> %x, <2 x double> %y, <2 x i32> <i32 1, i32 3>
|
||||
%r = fsub <2 x double> %a, %b
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
; SSE3: hsubpd2:
|
||||
; SSE3-NOT: vhsubpd
|
||||
; SSE3: hsubpd
|
||||
; AVX: hsubpd2:
|
||||
; AVX: vhsubpd
|
||||
define <2 x double> @hsubpd2(<2 x double> %x) {
|
||||
%a = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
|
||||
%b = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
|
||||
%r = fsub <2 x double> %a, %b
|
||||
ret <2 x double> %r
|
||||
}
|
||||
|
||||
; SSE3: hsubps1:
|
||||
; SSE3-NOT: vhsubps
|
||||
; SSE3: hsubps
|
||||
; AVX: hsubps1:
|
||||
; AVX: vhsubps
|
||||
define <4 x float> @hsubps1(<4 x float> %x, <4 x float> %y) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
|
||||
%r = fsub <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: hsubps2:
|
||||
; SSE3-NOT: vhsubps
|
||||
; SSE3: hsubps
|
||||
; AVX: hsubps2:
|
||||
; AVX: vhsubps
|
||||
define <4 x float> @hsubps2(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
|
||||
%r = fsub <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: hsubps3:
|
||||
; SSE3-NOT: vhsubps
|
||||
; SSE3: hsubps
|
||||
; AVX: hsubps3:
|
||||
; AVX: vhsubps
|
||||
define <4 x float> @hsubps3(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
|
||||
%r = fsub <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
||||
|
||||
; SSE3: hsubps4:
|
||||
; SSE3-NOT: vhsubps
|
||||
; SSE3: hsubps
|
||||
; AVX: hsubps4:
|
||||
; AVX: vhsubps
|
||||
define <4 x float> @hsubps4(<4 x float> %x) {
|
||||
%a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
|
||||
%b = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%r = fsub <4 x float> %a, %b
|
||||
ret <4 x float> %r
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user