Make LowerVSETCC aware of AVX types and add patterns to match them.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2011-08-09 00:46:57 +00:00
parent 8e0cca6945
commit 0f0e0a0e58
3 changed files with 53 additions and 3 deletions

View File

@ -989,6 +989,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::VSETCC, MVT::v8i32, Custom);
setOperationAction(ISD::VSETCC, MVT::v4i64, Custom);
// Custom lower several nodes for 256-bit types.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
@ -7912,9 +7915,10 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (isFP) {
unsigned SSECC = 8;
EVT VT0 = Op0.getValueType();
assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
EVT EltVT = Op0.getValueType().getVectorElementType();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
bool Swap = false;
switch (SetCCOpcode) {
@ -7961,6 +7965,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
}
if (!isFP && VT.getSizeInBits() == 256)
return SDValue();
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.

View File

@ -1264,14 +1264,39 @@ let Constraints = "$src1 = $dst" in {
SSEPackedDouble>, TB, OpSize;
}
let Predicates = [HasSSE1] in {
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
}
let Predicates = [HasSSE2] in {
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
(CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
}
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
(VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
(VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
(VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions

View File

@ -0,0 +1,18 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
; CHECK: vcmpltps %ymm
; CHECK-NOT: vucomiss
define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone {
%bincmp = fcmp olt <8 x float> %a, %b
%s = sext <8 x i1> %bincmp to <8 x i32>
ret <8 x i32> %s
}
; CHECK: vcmpltpd %ymm
; CHECK-NOT: vucomisd
define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone {
%bincmp = fcmp olt <4 x double> %a, %b
%s = sext <4 x i1> %bincmp to <4 x i64>
ret <4 x i64> %s
}