diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 668de242827..be7ba93df08 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3234,17 +3234,12 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>, VEX; - def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; let ExeDomain = SSEPackedInt in def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), + [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>, VEX; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), @@ -3260,16 +3255,11 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f64 VR256:$src), addr:$dst)]>, VEX; - def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; let ExeDomain = SSEPackedInt in def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), + [(alignednontemporalstore (v4i64 VR256:$src), addr:$dst)]>, VEX; } @@ -3288,14 +3278,10 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntpd\t{$src, $dst|$dst, $src}", [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; -def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; - let ExeDomain = SSEPackedInt in def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll new file mode 100644 index 00000000000..0768aae48e8 --- /dev/null +++ b/test/CodeGen/X86/avx2-nontemporal.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s + +define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) { +; CHECK: vmovntps + %cast = bitcast i8* %B to <8 x float>* + %A2 = fadd <8 x float> %A, + store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0 +; CHECK: vmovntdq + %cast1 = bitcast i8* %B to <4 x i64>* + %E2 = add <4 x i64> %E, + store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0 +; CHECK: vmovntpd + %cast2 = bitcast i8* %B to <4 x double>* + %C2 = fadd <4 x double> %C, + store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0 +; CHECK: movnti + %cast3 = bitcast i8* %B to i32* + store i32 %D, i32* %cast3, align 16, !nontemporal !0 + ret void +} + +!0 = metadata !{i32 1}