From 0f0e0a0e58d00ce75d14bf09a152b1469cbfac27 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 9 Aug 2011 00:46:57 +0000 Subject: [PATCH] Make LowerVSETCC aware of AVX types and add patterns to match them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137090 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 ++++++++++--- lib/Target/X86/X86InstrSSE.td | 25 +++++++++++++++++++++++++ test/CodeGen/X86/avx-256-cmp.ll | 18 ++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/X86/avx-256-cmp.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b2329470aba..71b23e2ccb0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -989,6 +989,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i32, Custom); setOperationAction(ISD::SRA, MVT::v16i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); + setOperationAction(ISD::VSETCC, MVT::v4i64, Custom); + // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -7912,9 +7915,10 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { if (isFP) { unsigned SSECC = 8; - EVT VT0 = Op0.getValueType(); - assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); - unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; + EVT EltVT = Op0.getValueType().getVectorElementType(); + assert(EltVT == MVT::f32 || EltVT == MVT::f64); + + unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD; bool Swap = false; switch (SetCCOpcode) { @@ -7961,6 +7965,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); } + if (!isFP && VT.getSizeInBits() == 256) + return SDValue(); + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index acd0c6b7a65..79ae90e4b5c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1264,14 +1264,39 @@ let Constraints = "$src1 = $dst" in { SSEPackedDouble>, TB, OpSize; } +let Predicates = [HasSSE1] in { def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +} + +let Predicates = [HasSSE2] in { def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; +} + +let Predicates = [HasAVX] in { +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), + (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; +def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), + (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; +def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), + (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; +} //===----------------------------------------------------------------------===// // SSE 1 & 2 - Shuffle Instructions diff --git a/test/CodeGen/X86/avx-256-cmp.ll b/test/CodeGen/X86/avx-256-cmp.ll new file mode 100644 index 00000000000..3323a5bd9cc --- /dev/null +++ b/test/CodeGen/X86/avx-256-cmp.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vcmpltps %ymm +; CHECK-NOT: vucomiss +define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone { + %bincmp = fcmp olt <8 x float> %a, %b + %s = sext <8 x i1> %bincmp to <8 x i32> + ret <8 x i32> %s +} + +; CHECK: vcmpltpd %ymm +; CHECK-NOT: vucomisd +define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone { + %bincmp = fcmp olt <4 x double> %a, %b + %s = sext <4 x i1> %bincmp to <4 x i64> + ret <4 x i64> %s +} +