From 1db4b4f5c4229d69ca7a3125d59cb20676795858 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 16 Jan 2006 17:53:00 +0000 Subject: [PATCH] transfer some notes from my email to somewhere useful. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25361 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/README.txt | 26 ++++++++++++++++++++ lib/Target/X86/README.txt | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 84536ef2deb..ffae6110a86 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -225,3 +225,29 @@ struct foo { double X, Y; }; void xxx(struct foo F); void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); } +===-------------------------------------------------------------------------=== + +For this: + +int h(int i, int j, int k) { + return (i==0||j==0||k == 0); +} + +We currently emit this: + +_h: + cntlzw r2, r3 + cntlzw r3, r4 + cntlzw r4, r5 + srwi r2, r2, 5 + srwi r3, r3, 5 + srwi r4, r4, 5 + or r2, r3, r2 + or r3, r2, r4 + blr + +The ctlz/shift instructions are created by the isel, so the dag combiner doesn't +have a chance to pull the shifts through the or's (eliminating two +instructions). SETCC nodes should be custom lowered in this case, not expanded +by the isel. + diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 4869c5e956d..cb206f3b04e 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -54,6 +54,10 @@ fxch -> fucomi fucomi jl X jg X +Ideas: +http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html + + //===---------------------------------------------------------------------===// Improvements to the multiply -> shift/add algorithm: @@ -121,3 +125,44 @@ Model X86 EFLAGS as a real register to avoid redudant cmp / test. e.g. setg %al testb %al, %al # unnecessary jne .BB7 + +//===---------------------------------------------------------------------===// + +Count leading zeros and count trailing zeros: + +int clz(int X) { return __builtin_clz(X); } +int ctz(int X) { return __builtin_ctz(X); } + +$ gcc t.c -S -o - -O3 -fomit-frame-pointer -masm=intel +clz: + bsr %eax, DWORD PTR [%esp+4] + xor %eax, 31 + ret +ctz: + bsf %eax, DWORD PTR [%esp+4] + ret + +however, check that these are defined for 0 and 32. Our intrinsics are, GCC's +aren't. + +//===---------------------------------------------------------------------===// + +Use push/pop instructions in prolog/epilog sequences instead of stores off +ESP (certain code size win, perf win on some [which?] processors). + +//===---------------------------------------------------------------------===// + +Only use inc/neg/not instructions on processors where they are faster than +add/sub/xor. They are slower on the P4 due to only updating some processor +flags. + +//===---------------------------------------------------------------------===// + +Open code rint,floor,ceil,trunc: +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html +http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html + +//===---------------------------------------------------------------------===// + +Combine: a = sin(x), b = cos(x) into a,b = sincos(x). +