mirror of
https://github.com/autc04/Retro68.git
synced 2024-11-24 23:32:06 +00:00
217 lines
3.3 KiB
ArmAsm
217 lines
3.3 KiB
ArmAsm
# D30V parallel optimization test
|
|
# assemble with "-O"
|
|
|
|
.text
|
|
start:
|
|
abs r1,r2
|
|
abs r3,r4
|
|
|
|
notfg f0,f4
|
|
notfg f1,f2
|
|
|
|
abs r1,r2
|
|
notfg f1,f2
|
|
|
|
# both change C flag
|
|
add r1,r2,r3
|
|
notfg C,f0
|
|
|
|
# one uses and one changes C flag
|
|
add r1,r2,r3
|
|
notfg f0,C
|
|
|
|
bra .
|
|
abs r1,r2
|
|
|
|
abs r1,r2
|
|
bra .
|
|
|
|
bsr .
|
|
abs r1,r2
|
|
|
|
abs r1,r2
|
|
abs r1,r2
|
|
bsr .
|
|
|
|
ldb r1,@(r2,r3)
|
|
stb r7,@(r8,r9)
|
|
|
|
stb r7,@(r8,r9)
|
|
ldb r1,@(r2,r3)
|
|
|
|
ldb r7,@(r8,r9)
|
|
ldb r1,@(r2,r3)
|
|
|
|
stb r7,@(r8,r9)
|
|
stb r1,@(r2,r3)
|
|
|
|
add r3, r3, r6
|
|
stw r2, @(r3, 0)
|
|
|
|
# should be serial because of conditional execution
|
|
cmple f0,r4,r5
|
|
jmp/tx 0x0
|
|
|
|
cmple f0,r4,r5
|
|
jmp/fx 0x0
|
|
|
|
cmple f0,r4,r5
|
|
jmp/xt 0x0
|
|
|
|
cmple f0,r4,r5
|
|
jmp/xf 0x0
|
|
|
|
cmple f0,r4,r5
|
|
jmp/tt 0x0
|
|
|
|
cmple f0,r4,r5
|
|
jmp/tf 0x0
|
|
|
|
cmple f1,r4,r5
|
|
jmp/tx 0x0
|
|
|
|
cmple f1,r4,r5
|
|
jmp/xt 0x0
|
|
|
|
# serial because of the r4 dependency
|
|
add r4, r0, 1
|
|
cmple f0, r4, r5
|
|
|
|
# parallel
|
|
add r4, r0, 1
|
|
cmple f0, r3, r5
|
|
|
|
# serial because ld2w loads r5
|
|
ld2w r4,@(r0,r6)
|
|
adds r5,r19,r20
|
|
|
|
# serial because ld2w loads r5
|
|
ld2w r4,@(r0,r6)
|
|
adds r3,r5,r20
|
|
|
|
# parallel even though ld2w uses r6 and adds changes it
|
|
ld2w r4,@(r0,r6)
|
|
adds r6,r19,r20
|
|
|
|
# parallel
|
|
ld2w r4,@(r0,r6)
|
|
adds r7,r19,r20
|
|
|
|
# parallel
|
|
ld2w r4,@(r0,r6)
|
|
adds r7,r0,r20
|
|
|
|
# parallel even though st2w uses r5 and adds modifies it
|
|
st2w r4,@(r0,r6)
|
|
adds r5,r19,r20
|
|
|
|
# parallel, both use but don't modify r5
|
|
st2w r4,@(r0,r6)
|
|
adds r3,r5,r20
|
|
|
|
# parallel even though st2w uses r6 and adds changes it
|
|
st2w r4,@(r0,r6)
|
|
adds r6,r19,r20
|
|
|
|
# parallel
|
|
st2w r4,@(r0,r6)
|
|
adds r7,r19,r20
|
|
|
|
# parallel
|
|
st2w r4,@(r0,r6)
|
|
adds r7,r0,r20
|
|
|
|
# test memory dependencies
|
|
|
|
# always serial because one could overwrite the other
|
|
st2w r10,@(r3,r4)
|
|
st2w r40,@(r43,r44)
|
|
|
|
# always serial
|
|
stw r1,@(r2,r3)
|
|
ldw r41,@(r42,r43)
|
|
|
|
# reads can happen in parallel but the current architecture
|
|
# doesn't support it
|
|
ldw r1,@(r2,r3)
|
|
ldb r41,@(r42,r43)
|
|
|
|
# test post increment and decrement dependencies
|
|
|
|
# serial
|
|
ldw r4,@(r6+,r11)
|
|
adds r9,r6,2
|
|
|
|
# parallel, modification to r6 happens last
|
|
adds r9,r6,2
|
|
ldw r4,@(r6-,r11)
|
|
|
|
# serial
|
|
stw r4,@(r6-,r11)
|
|
adds r9,r6,2
|
|
|
|
# parallel
|
|
ldw r4,@(r6,r11)
|
|
adds r9,r6,2
|
|
|
|
# parallel
|
|
adds r9,r6,2
|
|
ldw r4,@(r6,r11)
|
|
|
|
# if the first instruction is a jmp, don't parallelize
|
|
jmp 0
|
|
abs r1,r2
|
|
|
|
jsr 0
|
|
abs r1,r2
|
|
|
|
.align 3
|
|
|
|
bra 0
|
|
abs r1,r2
|
|
|
|
bsr 0
|
|
abs r1,r2
|
|
|
|
# Explicitly prohibited from parallel execution.
|
|
# The labels are here to prevent instruction pairs
|
|
# from being merged with following pairs.
|
|
|
|
label1:
|
|
st2w r2, @(r2, r3)
|
|
addhlll r4, r5, r6
|
|
label2:
|
|
st4hb r8, @(r8, r9)
|
|
subhllh r10, r11, r12
|
|
label3:
|
|
ld2w r14, @(r14, r15)
|
|
mulhxhl r16, r17, r18
|
|
label4:
|
|
ldw r19, @(r20, r21)
|
|
mulx2h r22, r23, r24
|
|
label5:
|
|
ldh r25, @(r26, r27)
|
|
mul2h r28, r29, r30
|
|
|
|
# Insertion of NOPs required to prevent pipeline clashes.
|
|
|
|
label6:
|
|
mul r1,r2,r3
|
|
mulhxll r4,r5,r6
|
|
add r7, r8, r9
|
|
label7:
|
|
|
|
mul r2,r3,r4
|
|
ldw r5, @(r6,r0)
|
|
|
|
ldw r10, @(r11, r0) <- mul r7,r8,r9
|
|
|
|
mul r12,r13,r14 -> ldw r15, @(r16, r0)
|
|
|
|
mac1 r2,r3,r4
|
|
ldw r5, @(r6,r0)
|
|
|
|
ldw r10, @(r11, r0) <- mac0 r7,r8,r9
|
|
ldw r10, @(r11, r0)
|
|
|