added optimized case for signed division by 2

This commit is contained in:
Irmen de Jong 2022-07-24 13:20:38 +02:00
parent dcc1f00048
commit 046dceb5c2
6 changed files with 70 additions and 40 deletions

View File

@ -519,14 +519,39 @@ internal class ExpressionsAsmGen(private val program: Program,
val rightVal = expr.right.constValue(program)?.number?.toInt()
if(rightVal!=null && rightVal==2) {
translateExpressionInternal(expr.left)
// shifting only yields the correct rounded result on unsinged numbers
if(leftDt==DataType.UBYTE) {
asmgen.out(" lsr P8ESTACK_LO+1,x")
return
} else if(leftDt==DataType.UWORD) {
asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x")
return
when (leftDt) {
DataType.UBYTE -> {
asmgen.out(" lsr P8ESTACK_LO+1,x")
}
DataType.UWORD -> {
asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x")
}
DataType.BYTE -> {
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
asmgen.out("""
lda P8ESTACK_LO+1,x
bpl +
inc P8ESTACK_LO+1,x
lda P8ESTACK_LO+1,x
+ asl a
ror P8ESTACK_LO+1,x""")
}
DataType.WORD -> {
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
asmgen.out("""
lda P8ESTACK_HI+1,x
bpl ++
inc P8ESTACK_LO+1,x
bne +
inc P8ESTACK_HI+1,x
+ lda P8ESTACK_HI+1,x
+ asl a
ror P8ESTACK_HI+1,x
ror P8ESTACK_LO+1,x""")
}
else -> throw AssemblyError("weird dt")
}
return
}
}
}

View File

@ -512,11 +512,7 @@ class CodeGen(internal val program: PtProgram,
return code
val pow2 = powersOfTwo.indexOf(factor)
if(pow2==1 && !signed) {
// just shift 1 bit
code += if(signed)
VmCodeInstruction(Opcode.ASR, dt, reg1=reg)
else
VmCodeInstruction(Opcode.LSR, dt, reg1=reg)
code += VmCodeInstruction(Opcode.LSR, dt, reg1=reg) // simple single bit shift
}
else if(pow2>=1 &&!signed) {
// just shift multiple bits
@ -545,11 +541,7 @@ class CodeGen(internal val program: PtProgram,
return code
val pow2 = powersOfTwo.indexOf(factor)
if(pow2==1 && !signed) {
// just shift 1 bit
code += if(signed)
VmCodeInstruction(Opcode.ASRM, dt, value=address)
else
VmCodeInstruction(Opcode.LSRM, dt, value=address)
code += VmCodeInstruction(Opcode.LSRM, dt, value=address) // just simple bit shift
}
else if(pow2>=1 && !signed) {
// just shift multiple bits

View File

@ -21,6 +21,7 @@ import kotlin.math.pow
class ExpressionSimplifier(private val program: Program) : AstWalker() {
private val powersOfTwo = (1..16).map { (2.0).pow(it) }.toSet()
private val negativePowersOfTwo = powersOfTwo.map { -it }.toSet()
override fun after(typecast: TypecastExpression, parent: Node): Iterable<IAstModification> {
val mods = mutableListOf<IAstModification>()
@ -469,7 +470,9 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
}
in powersOfTwo -> {
if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) {
// unsigned number divided by a power of two => shift right
// Unsigned number divided by a power of two => shift right
// Signed number can't simply be bitshifted in this case (due to rounding issues for negative values),
// so we leave that as is and let the code generator deal with it.
val numshifts = log2(cv).toInt()
return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
}
@ -529,6 +532,14 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
return BinaryExpression(expr2.left, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
}
}
in negativePowersOfTwo -> {
if (leftValue.inferType(program).isInteger) {
// times a negative power of two => negate, then shift
val numshifts = log2(-cv).toInt()
val negation = PrefixExpression("-", expr2.left, expr.position)
return BinaryExpression(negation, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
}
}
}
}
// no need to check for left val constant (because of associativity)

View File

@ -236,6 +236,11 @@ Unsigned integers are in the range 0-255 for unsigned byte types, and 0-65535 fo
The signed integers integers are in the range -128..127 for bytes,
and -32768..32767 for words.
.. caution::
Doing math on signed integers can result in code that is a lot larger and slower than
when using unsigned integers. Make sure you really need the signed numbers, otherwise
stick to unsigned integers for efficiency.
Boolean values
^^^^^^^^^^^^^^

View File

@ -3,16 +3,12 @@ TODO
For next release
^^^^^^^^^^^^^^^^
- Add optimized signed word division for factors of 2 (bit shifting but this time with correct rounding)
CodeGen divideByConst() and divideByConstInplace()
ExpressionsAsmGen translateExpression()
ExpressionSimplifier optimizeDivision() ?
- add item to XyzZeropage that enables an option that if zeropage=FULL or KERNALSAFE, moves the cx16 virtual registers to ZP, same location as on x16
(can be done on C64 only for now) Remove those addresses from the ZP free pool = allocate them in ZP like Cx16Zeropage does
Adapt the code in AstPreprocessor that relocates the registers as well.
- for uword pointer variables: allow pointer[uword] array indexing >255 , rewrite it to @(pointer+index)
DO NOT allow this for regular array indexing because normal arrays can never exceed size 256
...
@ -26,8 +22,7 @@ Need help with
Future Things and Ideas
^^^^^^^^^^^^^^^^^^^^^^^
Compiler:
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is.
this info is needed for more advanced optimizations and later code generation steps.
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is. This info is needed for more advanced optimizations and later code generation steps.
- vm: implement remaining sin/cos functions in math.p8
- vm: find a solution for the cx16.r0..r15 that "overlap" (r0, r0L, r0H etc) but in the vm each get their own separate variable location now
- vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8
@ -36,8 +31,8 @@ Compiler:
- vm: add ore optimizations in VmPeepholeOptimizer
- see if we can let for loops skip the loop if end<start, like other programming languages. Without adding a lot of code size/duplicating the loop condition.
this is documented behavior to now loop around but it's too easy to forget about!
Lot of work because of so many special cases in ForLoopsAsmgen.....
How is it for the vm target? -> just 2 special cases in CodeGen.
Lot of work because of so many special cases in ForLoopsAsmgen.....
How is it for the vm target? -> just 2 special cases in CodeGen.
- when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.
So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step.

View File

@ -8,22 +8,24 @@ main {
txt.nl()
}
; TODO test with new optimized division routines.
sub start() {
byte qq = 1
byte bb = -51
derp((bb*qq)/-4, 1,2,3,4)
bb /= -4
txt.print_b(bb)
word qq = 1
word bb = -5051
derp((bb*qq)/-2, 1,2,3,4)
bb /= -2
txt.print_w(bb)
txt.nl()
bb = 51
bb /= -4
txt.print_b(bb)
bb = -5051
bb = -bb/2
txt.print_w(bb)
txt.nl()
ubyte ubb = 51
ubb /= 4
txt.print_ub(ubb)
bb = 5051
bb /= -2
txt.print_w(bb)
txt.nl()
uword ubb = 5051
ubb /= 2
txt.print_uw(ubb)
txt.nl()
}
}