diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d8eaee71c89..69acf1a0181 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3833,11 +3833,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op, } else if (VT == MVT::f32) { // EXTRACTPS outputs to a GPR32 register which will require a movd to copy // the result back to FR32 register. It's only worth matching if the - // result has a single use which is a store. + // result has a single use which is a store or a bitcast to i32. if (!Op.hasOneUse()) return SDOperand(); SDNode *User = Op.Val->use_begin()->getUser(); - if (User->getOpcode() != ISD::STORE) + if (User->getOpcode() != ISD::STORE && + (User->getOpcode() != ISD::BIT_CONVERT || + User->getValueType(0) != MVT::i32)) return SDOperand(); SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 442847cda8e..982b0dc6df6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3387,13 +3387,12 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">; /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory /// destination multiclass SS41I_extractf32 opc, string OpcodeStr> { - // Not worth matching to rr form of extractps since the result is in GPR32. def rr : SS4AIi8, + [(set GR32:$dst, + (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, OpSize; def mr : SS4AIi8 %v) { + %s = extractelement <4 x float> %v, i32 3 + %i = bitcast float %s to i32 + ret i32 %i +} +define i32 @boo(<4 x float> %v) { + %t = bitcast <4 x float> %v to <4 x i32> + %s = extractelement <4 x i32> %t, i32 3 + ret i32 %s +} diff --git a/test/CodeGen/X86/sse41-extractps-bitcast-1.ll b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll new file mode 100644 index 00000000000..fc0df060623 --- /dev/null +++ b/test/CodeGen/X86/sse41-extractps-bitcast-1.ll @@ -0,0 +1,19 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | not grep extractps + +; The non-store form of extractps puts its result into a GPR. +; This makes it suitable for an extract from a <4 x float> that +; is bitcasted to i32, but unsuitable for much of anything else. + +define float @bar(<4 x float> %v) { + %s = extractelement <4 x float> %v, i32 3 + %t = add float %s, 1.0 + ret float %t +} +define float @baz(<4 x float> %v) { + %s = extractelement <4 x float> %v, i32 3 + ret float %s +} +define i32 @qux(<4 x i32> %v) { + %i = extractelement <4 x i32> %v, i32 3 + ret i32 %i +}