diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 58fd1f3f8da..2da9b4baeb7 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -339,10 +339,6 @@ class VPSI o, Format F, dag outs, dag ins, string asm, list pattern> : I, TB, Requires<[HasAVX]>; -class VoPSI o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, TB, - Requires<[HasXMM]>; // SSE2 Instruction Templates: // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0bc3afa77bf..fe83ae9ddd7 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -475,6 +475,7 @@ def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasXMM : Predicate<"Subtarget->hasXMM()">; def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; +def HasSSE3orAVX : Predicate<"Subtarget->hasSSE3orAVX()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 78143f6977e..d6ae3af0d4e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3253,19 +3253,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), //===----------------------------------------------------------------------===// // Prefetch intrinsic. -def PREFETCHT0 : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>; -def PREFETCHT1 : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>; -def PREFETCHT2 : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>; -def PREFETCHNTA : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>; +let Predicates = [HasXMM] in { +def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>, TB; +def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>, TB; +def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>, TB; +def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>, TB; +} // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasSSE2]>; + TB, Requires<[HasXMMInt]>; // Pause. This "instruction" is encoded as "rep; nop", so even though it // was introduced with SSE2, it's backward compatible. @@ -3273,11 +3275,11 @@ def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; // Load, store, and memory fence def SFENCE : I<0xAE, MRM_F8, (outs), (ins), - "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>; + "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasXMM]>; def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasXMMInt]>; def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasXMMInt]>; def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -5463,17 +5465,19 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), - [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>; + [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, + Requires<[HasSSE3orAVX]>; def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), - [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>; + [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>, + Requires<[HasSSE3orAVX]>; } let Uses = [EAX, ECX, EDX] in def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB, - Requires<[HasSSE3]>; + Requires<[HasSSE3orAVX]>; let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB, - Requires<[HasSSE3]>; + Requires<[HasSSE3orAVX]>; def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll index b514cf6427d..aaedf18481b 100644 --- a/test/CodeGen/X86/apm.ll +++ b/test/CodeGen/X86/apm.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64 ; PR8573 ; CHECK: foo: diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll index f58391469ce..b4f04ceb8f7 100644 --- a/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2481,4 +2481,52 @@ define void @test_x86_avx_vzeroupper() { } declare void @llvm.x86.avx.vzeroupper() nounwind +; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work +; CHECK: monitor +define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { +entry: + tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) + ret void +} +declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind + +; CHECK: mwait +define void @mwait(i32 %E, i32 %H) nounwind { +entry: + tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) + ret void +} +declare void @llvm.x86.sse3.mwait(i32, i32) nounwind + +; CHECK: sfence +define void @sfence() nounwind { +entry: + tail call void @llvm.x86.sse.sfence() + ret void +} +declare void @llvm.x86.sse.sfence() nounwind + +; CHECK: lfence +define void @lfence() nounwind { +entry: + tail call void @llvm.x86.sse2.lfence() + ret void +} +declare void @llvm.x86.sse2.lfence() nounwind + +; CHECK: mfence +define void @mfence() nounwind { +entry: + tail call void @llvm.x86.sse2.mfence() + ret void +} +declare void @llvm.x86.sse2.mfence() nounwind + +; CHECK: clflush +define void @clflush(i8* %p) nounwind { +entry: + tail call void @llvm.x86.sse2.clflush(i8* %p) + ret void +} +declare void @llvm.x86.sse2.clflush(i8*) nounwind