From d9ced092998b5ea3b10ab32b8f2407022b4508db Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 8 Aug 2008 18:30:21 +0000 Subject: [PATCH] Add an EXTRACTPSmr pattern to match the pattern that X86ISelLowering creates. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54544 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 1 + lib/Target/X86/X86InstrSSE.td | 7 +++++++ test/CodeGen/X86/extractps.ll | 16 ++++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 test/CodeGen/X86/extractps.ll diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a871fa8dc43..d800c9741b9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -238,6 +238,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::DIV32r, X86::DIV32m, 1 }, { X86::DIV64r, X86::DIV64m, 1 }, { X86::DIV8r, X86::DIV8m, 1 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 }, { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, { X86::IDIV16r, X86::IDIV16m, 1 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 856525e462a..963a60584b6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3485,6 +3485,13 @@ multiclass SS41I_extractf32 opc, string OpcodeStr> { defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; +// Also match an EXTRACTPS store when the store is done as f32 instead of i32. +def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), + imm:$src2))), + addr:$dst), + (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasSSE41]>; + let Constraints = "$src1 = $dst" in { multiclass SS41I_insert8 opc, string OpcodeStr> { def rr : SS4AIi8:0 [#uses=2] + +define internal void @""() nounwind { + load float* @0, align 16 ; :1 [#uses=1] + insertelement <4 x float> undef, float %1, i32 0 ; <<4 x float>>:2 [#uses=1] + call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 ) ; <<4 x float>>:3 [#uses=1] + extractelement <4 x float> %3, i32 0 ; :4 [#uses=1] + store float %4, float* @0, align 16 + ret void +} + +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone +