mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-05 01:31:05 +00:00
Make LowerVSETCC aware of AVX types and add patterns to match them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8e0cca6945
commit
0f0e0a0e58
@ -989,6 +989,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
|
||||
|
||||
setOperationAction(ISD::VSETCC, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::VSETCC, MVT::v4i64, Custom);
|
||||
|
||||
// Custom lower several nodes for 256-bit types.
|
||||
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
|
||||
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
|
||||
@ -7912,9 +7915,10 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
if (isFP) {
|
||||
unsigned SSECC = 8;
|
||||
EVT VT0 = Op0.getValueType();
|
||||
assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
|
||||
unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
|
||||
EVT EltVT = Op0.getValueType().getVectorElementType();
|
||||
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
|
||||
|
||||
unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
|
||||
bool Swap = false;
|
||||
|
||||
switch (SetCCOpcode) {
|
||||
@ -7961,6 +7965,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
|
||||
}
|
||||
|
||||
if (!isFP && VT.getSizeInBits() == 256)
|
||||
return SDValue();
|
||||
|
||||
// We are handling one of the integer comparisons here. Since SSE only has
|
||||
// GT and EQ comparisons for integer, swapping operands and multiple
|
||||
// operations may be required for some comparisons.
|
||||
|
@ -1264,14 +1264,39 @@ let Constraints = "$src1 = $dst" in {
|
||||
SSEPackedDouble>, TB, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE1] in {
|
||||
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
|
||||
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
|
||||
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
||||
(CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
|
||||
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
|
||||
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
||||
(CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
|
||||
(VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
|
||||
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
||||
(VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
|
||||
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
|
||||
(VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
|
||||
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
||||
(VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
|
||||
|
||||
def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
|
||||
(VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
|
||||
def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
|
||||
(VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
|
||||
def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
|
||||
(VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
|
||||
def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
|
||||
(VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE 1 & 2 - Shuffle Instructions
|
||||
|
18
test/CodeGen/X86/avx-256-cmp.ll
Normal file
18
test/CodeGen/X86/avx-256-cmp.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
; CHECK: vcmpltps %ymm
|
||||
; CHECK-NOT: vucomiss
|
||||
define <8 x i32> @cmp00(<8 x float> %a, <8 x float> %b) nounwind readnone {
|
||||
%bincmp = fcmp olt <8 x float> %a, %b
|
||||
%s = sext <8 x i1> %bincmp to <8 x i32>
|
||||
ret <8 x i32> %s
|
||||
}
|
||||
|
||||
; CHECK: vcmpltpd %ymm
|
||||
; CHECK-NOT: vucomisd
|
||||
define <4 x i64> @cmp01(<4 x double> %a, <4 x double> %b) nounwind readnone {
|
||||
%bincmp = fcmp olt <4 x double> %a, %b
|
||||
%s = sext <4 x i1> %bincmp to <4 x i64>
|
||||
ret <4 x i64> %s
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user