mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 20:29:48 +00:00
Add some observations from CoreGraphics benchmark. Remove register
scavenging todo item, since it is now implemented. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35044 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
349ec1c11b
commit
a6bc6fc170
@ -140,6 +140,29 @@ odd/even pair. However, we probably would pay a penalty if the address is not
|
||||
aligned on 8-byte boundary. This requires more information on load / store
|
||||
nodes (and MI's?) then we currently carry.
|
||||
|
||||
6) (From CoreGraphics): struct copies appear to be done field by field
|
||||
instead of by words, at least sometimes:
|
||||
|
||||
struct foo { int x; short s; char c1; char c2; };
|
||||
void cpy(struct foo*a, struct foo*b) { *a = *b; }
|
||||
|
||||
llvm code (-O2)
|
||||
ldrb r3, [r1, #+6]
|
||||
ldr r2, [r1]
|
||||
ldrb r12, [r1, #+7]
|
||||
ldrh r1, [r1, #+4]
|
||||
str r2, [r0]
|
||||
strh r1, [r0, #+4]
|
||||
strb r3, [r0, #+6]
|
||||
strb r12, [r0, #+7]
|
||||
gcc code (-O2)
|
||||
ldmia r1, {r1-r2}
|
||||
stmia r0, {r1-r2}
|
||||
|
||||
In this benchmark poor handling of aggregate copies has shown up as
|
||||
having a large effect on size, and possibly speed as well (we don't have
|
||||
a good way to measure on ARM).
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
* Consider this silly example:
|
||||
@ -282,53 +305,8 @@ See McCat/18-imp/ComputeBoundingBoxes for an example.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We need register scavenging. Currently, the 'ip' register is reserved in case
|
||||
frame indexes are too big. This means that we generate extra code for stuff
|
||||
like this:
|
||||
|
||||
void foo(unsigned x, unsigned y, unsigned z, unsigned *a, unsigned *b, unsigned *c) {
|
||||
short Rconst = (short) (16384.0f * 1.40200 + 0.5 );
|
||||
*a = x * Rconst;
|
||||
*b = y * Rconst;
|
||||
*c = z * Rconst;
|
||||
}
|
||||
|
||||
we compile it to:
|
||||
|
||||
_foo:
|
||||
*** stmfd sp!, {r4, r7}
|
||||
*** add r7, sp, #4
|
||||
mov r4, #186
|
||||
orr r4, r4, #89, 24 @ 22784
|
||||
mul r0, r0, r4
|
||||
str r0, [r3]
|
||||
mul r0, r1, r4
|
||||
ldr r1, [sp, #+8]
|
||||
str r0, [r1]
|
||||
mul r0, r2, r4
|
||||
ldr r1, [sp, #+12]
|
||||
str r0, [r1]
|
||||
*** sub sp, r7, #4
|
||||
*** ldmfd sp!, {r4, r7}
|
||||
bx lr
|
||||
|
||||
GCC produces:
|
||||
|
||||
_foo:
|
||||
ldr ip, L4
|
||||
mul r0, ip, r0
|
||||
mul r1, ip, r1
|
||||
str r0, [r3, #0]
|
||||
ldr r3, [sp, #0]
|
||||
mul r2, ip, r2
|
||||
str r1, [r3, #0]
|
||||
ldr r3, [sp, #4]
|
||||
str r2, [r3, #0]
|
||||
bx lr
|
||||
L4:
|
||||
.long 22970
|
||||
|
||||
This is apparently all because we couldn't use ip here.
|
||||
Register scavenging is now implemented. The example in the previous version
|
||||
of this document produces optimal code at -O2.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
@ -449,3 +427,25 @@ http://www.inf.u-szeged.hu/gcc-arm/
|
||||
http://citeseer.ist.psu.edu/debus04linktime.html
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
(CoreGraphics): gcc generates smaller code for this function at -O2 or -Os:
|
||||
|
||||
void foo(signed char* p) {
|
||||
if (*p == 3)
|
||||
bar();
|
||||
else if (*p == 4)
|
||||
baz();
|
||||
else if (*p == 5)
|
||||
quux();
|
||||
}
|
||||
|
||||
llvm decides it's a good idea to turn the repeated if...else into a
|
||||
binary tree, as if it were a switch; the resulting code requires -1
|
||||
compare-and-branches when *p<=2 or *p==5, the same number if *p==4
|
||||
or *p>6, and +1 if *p==3. So it should be a speed win
|
||||
(on balance). However, the revised code is larger, with 4 conditional
|
||||
branches instead of 3.
|
||||
|
||||
More seriously, there is a byte->word extend before
|
||||
each comparison, where there should be only one, and the condition codes
|
||||
are not remembered when the same two values are compared twice.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user