From 1db4b4f5c4229d69ca7a3125d59cb20676795858 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Mon, 16 Jan 2006 17:53:00 +0000
Subject: [PATCH] transfer some notes from my email to somewhere useful.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25361 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/README.txt | 26 ++++++++++++++++++++
 lib/Target/X86/README.txt     | 45 +++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 84536ef2deb..ffae6110a86 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -225,3 +225,29 @@ struct foo { double X, Y; };
 void xxx(struct foo F);
 void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
 
+===-------------------------------------------------------------------------===
+
+For this:
+
+int h(int i, int j, int k) {
+ return (i==0||j==0||k == 0);
+}
+
+We currently emit this:
+
+_h:
+        cntlzw r2, r3
+        cntlzw r3, r4
+        cntlzw r4, r5
+        srwi r2, r2, 5
+        srwi r3, r3, 5
+        srwi r4, r4, 5
+        or r2, r3, r2
+        or r3, r2, r4
+        blr
+
+The ctlz/shift instructions are created by the isel, so the dag combiner doesn't
+have a chance to pull the shifts through the or's (eliminating two 
+instructions).  SETCC nodes should be custom lowered in this case, not expanded
+by the isel.
+
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 4869c5e956d..cb206f3b04e 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -54,6 +54,10 @@ fxch	->		fucomi
 fucomi			jl X
 jg X
 
+Ideas:
+http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
+
+
 //===---------------------------------------------------------------------===//
 
 Improvements to the multiply -> shift/add algorithm:
@@ -121,3 +125,44 @@ Model X86 EFLAGS as a real register to avoid redudant cmp / test. e.g.
 	setg %al
 	testb %al, %al  # unnecessary
 	jne .BB7
+
+//===---------------------------------------------------------------------===//
+
+Count leading zeros and count trailing zeros:
+
+int clz(int X) { return __builtin_clz(X); }
+int ctz(int X) { return __builtin_ctz(X); }
+
+$ gcc t.c -S -o - -O3  -fomit-frame-pointer -masm=intel
+clz:
+        bsr     %eax, DWORD PTR [%esp+4]
+        xor     %eax, 31
+        ret
+ctz:
+        bsf     %eax, DWORD PTR [%esp+4]
+        ret
+
+however, check that these are defined for 0 and 32.  Our intrinsics are, GCC's
+aren't.
+
+//===---------------------------------------------------------------------===//
+
+Use push/pop instructions in prolog/epilog sequences instead of stores off 
+ESP (certain code size win, perf win on some [which?] processors).
+
+//===---------------------------------------------------------------------===//
+
+Only use inc/neg/not instructions on processors where they are faster than
+add/sub/xor.  They are slower on the P4 due to only updating some processor
+flags.
+
+//===---------------------------------------------------------------------===//
+
+Open code rint,floor,ceil,trunc:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
+
+//===---------------------------------------------------------------------===//
+
+Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
+