diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 8d9eabad0be..b98f5fbb995 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -2,11 +2,6 @@ // Random ideas for the X86 backend. //===---------------------------------------------------------------------===// -We should add support for the "movbe" instruction, which does a byte-swapping -copy (3-addr bswap + memory support?) This is available on Atom processors. - -//===---------------------------------------------------------------------===// - This should be one DIV/IDIV instruction, not a libcall: unsigned test(unsigned long long X, unsigned Y) { diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index b7951199e8a..4505dafcbfe 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -133,7 +133,7 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; -def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, +def : Proc<"atom", [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index efae5fbbd42..2640a90cb77 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1308,17 +1308,23 @@ def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst), // let Predicates = [HasMOVBE] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), - "movbe{w}\t{$src, $dst|$dst, $src}", []>, OpSize, T8; + "movbe{w}\t{$src, $dst|$dst, $src}", + [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>, OpSize, T8; def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), - "movbe{l}\t{$src, $dst|$dst, $src}", []>, T8; + "movbe{l}\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>, T8; def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), - "movbe{q}\t{$src, $dst|$dst, $src}", []>, T8; + "movbe{q}\t{$src, $dst|$dst, $src}", + [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>, T8; def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), - "movbe{w}\t{$src, $dst|$dst, $src}", []>, OpSize, T8; + "movbe{w}\t{$src, $dst|$dst, $src}", + [(store (bswap GR16:$src), addr:$dst)]>, OpSize, T8; def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movbe{l}\t{$src, $dst|$dst, $src}", []>, T8; + "movbe{l}\t{$src, $dst|$dst, $src}", + [(store (bswap GR32:$src), addr:$dst)]>, T8; def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "movbe{q}\t{$src, $dst|$dst, $src}", []>, T8; + "movbe{q}\t{$src, $dst|$dst, $src}", + [(store (bswap GR64:$src), addr:$dst)]>, T8; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/movbe.ll b/test/CodeGen/X86/movbe.ll new file mode 100644 index 00000000000..0ed84693d58 --- /dev/null +++ b/test/CodeGen/X86/movbe.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=x86-64 -mcpu=atom < %s | FileCheck %s + +declare i32 @llvm.bswap.i32(i32) nounwind readnone +declare i64 @llvm.bswap.i64(i64) nounwind readnone + +define void @test1(i32* nocapture %x, i32 %y) nounwind { + %bswap = call i32 @llvm.bswap.i32(i32 %y) + store i32 %bswap, i32* %x, align 4 + ret void +; CHECK: test1: +; CHECK: movbel %esi, (%rdi) +} + +define i32 @test2(i32* %x) nounwind { + %load = load i32* %x, align 4 + %bswap = call i32 @llvm.bswap.i32(i32 %load) + ret i32 %bswap +; CHECK: test2: +; CHECK: movbel (%rdi), %eax +} + +define void @test3(i64* %x, i64 %y) nounwind { + %bswap = call i64 @llvm.bswap.i64(i64 %y) + store i64 %bswap, i64* %x, align 8 + ret void +; CHECK: test3: +; CHECK: movbeq %rsi, (%rdi) +} + +define i64 @test4(i64* %x) nounwind { + %load = load i64* %x, align 8 + %bswap = call i64 @llvm.bswap.i64(i64 %load) + ret i64 %bswap +; CHECK: test4: +; CHECK: movbeq (%rdi), %rax +}