diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 5c133f12e44..6aec21e3a91 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1169,7 +1169,6 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, Parent->getOpcode() != ISD::PREFETCH && Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores. - Parent->getOpcode() != X86ISD::VZEXT_LOAD && Parent->getOpcode() != X86ISD::FLD && Parent->getOpcode() != X86ISD::FILD && Parent->getOpcode() != X86ISD::FILD_FLAG && diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3f7f272b1bd..dce154cdd17 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4113,7 +4113,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, /// rather than undef via VZEXT_LOAD, but we do not detect that case today. /// There's even a handy isZeroNode for that purpose. static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, - DebugLoc &dl, SelectionDAG &DAG) { + DebugLoc &DL, SelectionDAG &DAG) { EVT EltVT = VT.getVectorElementType(); unsigned NumElems = Elts.size(); @@ -4150,18 +4150,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, // consecutive loads for the low half, generate a vzext_load node. if (LastLoadedElt == NumElems - 1) { if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) - return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), + return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), 0); - return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), + return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), LDBase->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); + SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, + Ops, 2, MVT::i32, + LDBase->getMemOperand()); + return DAG.getNode(ISD::BIT_CONVERT, DL, VT, ResNode); } return SDValue(); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 870cf74e97a..b8770c00d57 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -220,9 +220,6 @@ namespace llvm { // VZEXT_MOVL - Vector move low and zero extend. VZEXT_MOVL, - // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. - VZEXT_LOAD, - // VSHL, VSRL - Vector logical left / right shift. VSHL, VSRL, @@ -309,8 +306,11 @@ namespace llvm { // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap. LCMPXCHG_DAG, - LCMPXCHG8_DAG + LCMPXCHG8_DAG, + // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. + VZEXT_LOAD + // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be // thought as target memory ops! diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index f75e35c8e8d..104f880032d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -102,7 +102,7 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS", def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, - [SDNPHasChain, SDNPMayLoad]>; + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 5ac89d65e31..bf8eb1b7c9b 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -111,10 +111,10 @@ def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, - SDNPMayLoad]>; + SDNPMayLoad, SDNPMemOperand]>; def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore, - SDNPMayLoad]>; + SDNPMayLoad, SDNPMemOperand]>; def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll index 3a249470d46..31c42af1b18 100644 --- a/test/CodeGen/X86/movgs.ll +++ b/test/CodeGen/X86/movgs.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64 define i32 @test1() nounwind readonly { entry: @@ -31,3 +31,27 @@ entry: ; X64: test2: ; X64: callq *%gs:(%rdi) + + + + +define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly { +entry: + %0 = load i64 addrspace(256)* %p + %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0 + %1 = bitcast <2 x i64> %tmp2 to <8 x i16> + %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone + %3 = bitcast <4 x i32> %2 to <2 x i64> + ret <2 x i64> %3 + +; X32: pmovsxwd_1: +; X32: movl 4(%esp), %eax +; X32: pmovsxwd %gs:(%eax), %xmm0 +; X32: ret + +; X64: pmovsxwd_1: +; X64: pmovsxwd %gs:(%rdi), %xmm0 +; X64: ret +} + +declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll index d9de892933e..061e33feaa5 100644 --- a/test/CodeGen/X86/widen_select-1.ll +++ b/test/CodeGen/X86/widen_select-1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s -; CHECK: jne +; CHECK: je ; widening select v6i32 and then a sub