diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4b283a2988a..3874c1968b5 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3939,6 +3939,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), PS, Requires<[HasSSE2]>; } // SchedRW = [WriteStore] +let Predicates = [HasAVX, NoVLX] in { + def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), + (VMOVNTPSmr addr:$dst, VR128:$src)>; +} + +def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), + (MOVNTPSmr addr:$dst, VR128:$src)>; + } // AddedComplexity //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll new file mode 100644 index 00000000000..9d0cb9a5edf --- /dev/null +++ b/test/CodeGen/X86/nontemporal-2.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX + + +; Make sure that we generate non-temporal stores for the test cases below. + +define void @test1(<4 x float>* %dst) { +; CHECK-LABEL: test1: +; SSE: movntps +; AVX: vmovntps + store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1 + ret void +} + +define void @test2(<4 x i32>* %dst) { +; CHECK-LABEL: test2: +; SSE: movntps +; AVX: vmovntps + store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1 + ret void +} + +define void @test3(<2 x double>* %dst) { +; CHECK-LABEL: test3: +; SSE: movntps +; AVX: vmovntps + store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1 + ret void +} + +!1 = metadata !{i32 1}