diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index aa7bb3d9788..3c6138bb1e6 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1868,3 +1868,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this information to add the "lock" prefix. //===---------------------------------------------------------------------===// + +_Bool bar(int *x) { return *x & 1; } + +define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { +entry: + %tmp1 = load i32* %x ; [#uses=1] + %and = and i32 %tmp1, 1 ; [#uses=1] + %tobool = icmp ne i32 %and, 0 ; [#uses=1] + ret i1 %tobool +} + +bar: # @bar +# BB#0: # %entry + movl 4(%esp), %eax + movb (%eax), %al + andb $1, %al + movzbl %al, %eax + ret + +Missed optimization: should be movl+andl. + +//===---------------------------------------------------------------------===// + +Consider the following two functions compiled with clang: +_Bool foo(int *x) { return !(*x & 4); } +unsigned bar(int *x) { return !(*x & 4); } + +foo: + movl 4(%esp), %eax + testb $4, (%eax) + sete %al + movzbl %al, %eax + ret + +bar: + movl 4(%esp), %eax + movl (%eax), %eax + shrl $2, %eax + andl $1, %eax + xorl $1, %eax + ret + +The second function generates more code even though the two functions are +are functionally identical. + +//===---------------------------------------------------------------------===// + +Take the following C code: +int x(int y) { return (y & 63) << 14; } + +Code produced by gcc: + andl $63, %edi + sall $14, %edi + movl %edi, %eax + ret + +Code produced by clang: + shll $14, %edi + movl %edi, %eax + andl $1032192, %eax + ret + +The code produced by gcc is 3 bytes shorter. This sort of construct often +shows up with bitfields. + +//===---------------------------------------------------------------------===//