mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-02 07:32:52 +00:00
Split out altivec notes into their own README
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27168 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a0b3afbe14
commit
b86bd2cee2
@ -1,3 +1,5 @@
|
||||
//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
|
||||
|
||||
TODO:
|
||||
* gpr0 allocation
|
||||
* implement do-loop -> bdnz transform
|
||||
@ -309,12 +311,6 @@ Generate lwbrx and other byteswapping load/store instructions when reasonable.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Implement TargetConstantVec, and set up PPC to custom lower ConstantVec into
|
||||
TargetConstantVec's if it's one of the many forms that are algorithmically
|
||||
computable using the spiffy altivec instructions.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Compile this:
|
||||
|
||||
int foo(int a) {
|
||||
@ -502,11 +498,6 @@ This theoretically may help improve twolf slightly (used in dimbox.c:142?).
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
|
||||
registers, to generate better spill code.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
int foo(int N, int ***W, int **TK, int X) {
|
||||
int t, i;
|
||||
|
||||
@ -524,32 +515,6 @@ http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Altivec support. The first should be a single lvx from the constant pool, the
|
||||
second should be a xor/stvx:
|
||||
|
||||
void foo(void) {
|
||||
int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
||||
bar (x);
|
||||
}
|
||||
|
||||
#include <string.h>
|
||||
void foo(void) {
|
||||
int x[8] __attribute__((aligned(128)));
|
||||
memset (x, 0, sizeof (x));
|
||||
bar (x);
|
||||
}
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
|
||||
|
||||
We need to codegen -0.0 vector efficiently (no constant pool load).
|
||||
|
||||
When -ffast-math is on, we can use 0.0.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
float foo(float X) { return (int)(X); }
|
||||
|
||||
Currently produces:
|
||||
@ -569,16 +534,6 @@ We could use a target dag combine to turn the lwz/extsw into an lwa when the
|
||||
lwz has a single use. Since LWA is cracked anyway, this would be a codesize
|
||||
win only.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
Consider this:
|
||||
v4f32 Vector;
|
||||
v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
|
||||
|
||||
Since we know that "Vector" is 16-byte aligned and we know the element offset
|
||||
of ".X", we should change the load into a lve*x instruction, instead of doing
|
||||
a load/store/lve*x sequence.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
We generate ugly code for this:
|
||||
@ -596,8 +551,3 @@ void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
There are a wide range of vector constants we can generate with combinations of
|
||||
altivec instructions. For example, GCC does: t=vsplti*, r = t+t.
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
|
54
lib/Target/PowerPC/README_ALTIVEC.txt
Normal file
54
lib/Target/PowerPC/README_ALTIVEC.txt
Normal file
@ -0,0 +1,54 @@
|
||||
//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
|
||||
|
||||
Implement TargetConstantVec, and set up PPC to custom lower ConstantVec into
|
||||
TargetConstantVec's if it's one of the many forms that are algorithmically
|
||||
computable using the spiffy altivec instructions.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
|
||||
registers, to generate better spill code.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
Altivec support. The first should be a single lvx from the constant pool, the
|
||||
second should be a xor/stvx:
|
||||
|
||||
void foo(void) {
|
||||
int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 1, 1, 1, 1, 1 };
|
||||
bar (x);
|
||||
}
|
||||
|
||||
#include <string.h>
|
||||
void foo(void) {
|
||||
int x[8] __attribute__((aligned(128)));
|
||||
memset (x, 0, sizeof (x));
|
||||
bar (x);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
|
||||
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
|
||||
|
||||
We need to codegen -0.0 vector efficiently (no constant pool load).
|
||||
|
||||
When -ffast-math is on, we can use 0.0.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
Consider this:
|
||||
v4f32 Vector;
|
||||
v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
|
||||
|
||||
Since we know that "Vector" is 16-byte aligned and we know the element offset
|
||||
of ".X", we should change the load into a lve*x instruction, instead of doing
|
||||
a load/store/lve*x sequence.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
There are a wide range of vector constants we can generate with combinations of
|
||||
altivec instructions. For example, GCC does: t=vsplti*, r = t+t.
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
Loading…
Reference in New Issue
Block a user