diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a9a8c885761..2e4fa32b7fa 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -84,8 +84,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); - if (ElemTy == MVT::i8 || ElemTy == MVT::i16) - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); if (ElemTy != MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); @@ -3777,14 +3776,19 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); - SDValue Vec = Op.getOperand(0); + // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); - assert(VT == MVT::i32 && - Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && - "unexpected type for custom-lowering vector extract"); - return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + if (!isa(Lane)) + return SDValue(); + + SDValue Vec = Op.getOperand(0); + if (Op.getValueType() == MVT::i32 && + Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + } + + return Op; } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { @@ -4923,7 +4927,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, if (VT == MVT::i32 && (EltVT == MVT::i8 || EltVT == MVT::i16) && - TLI.isTypeLegal(Vec.getValueType())) { + TLI.isTypeLegal(Vec.getValueType()) && + isa(Lane)) { unsigned Opc = 0; switch (N->getOpcode()) { diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll index 0fd184388da..512dc5a67fb 100644 --- a/test/CodeGen/ARM/vget_lane.ll +++ b/test/CodeGen/ARM/vget_lane.ll @@ -210,3 +210,20 @@ entry: %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1] ret <2 x float> %0 } + +; The llvm extractelement instruction does not require that the lane number +; be an immediate constant. Make sure a variable lane number is handled. + +define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind { + %tmp1 = load <8 x i8>* %A + %tmp2 = extractelement <8 x i8> %tmp1, i32 %B + %tmp3 = sext i8 %tmp2 to i32 + ret i32 %tmp3 +} + +define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind { + %tmp1 = load <4 x i32>* %A + %tmp2 = add <4 x i32> %tmp1, %tmp1 + %tmp3 = extractelement <4 x i32> %tmp2, i32 %B + ret i32 %tmp3 +}