added optimized case for signed division by 2

This commit is contained in:
Irmen de Jong 2022-07-24 13:20:38 +02:00
parent dcc1f00048
commit 046dceb5c2
6 changed files with 70 additions and 40 deletions

View File

@ -519,14 +519,39 @@ internal class ExpressionsAsmGen(private val program: Program,
val rightVal = expr.right.constValue(program)?.number?.toInt() val rightVal = expr.right.constValue(program)?.number?.toInt()
if(rightVal!=null && rightVal==2) { if(rightVal!=null && rightVal==2) {
translateExpressionInternal(expr.left) translateExpressionInternal(expr.left)
// shifting only yields the correct rounded result on unsinged numbers when (leftDt) {
if(leftDt==DataType.UBYTE) { DataType.UBYTE -> {
asmgen.out(" lsr P8ESTACK_LO+1,x") asmgen.out(" lsr P8ESTACK_LO+1,x")
return }
} else if(leftDt==DataType.UWORD) { DataType.UWORD -> {
asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x") asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x")
return }
DataType.BYTE -> {
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
asmgen.out("""
lda P8ESTACK_LO+1,x
bpl +
inc P8ESTACK_LO+1,x
lda P8ESTACK_LO+1,x
+ asl a
ror P8ESTACK_LO+1,x""")
}
DataType.WORD -> {
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
asmgen.out("""
lda P8ESTACK_HI+1,x
bpl ++
inc P8ESTACK_LO+1,x
bne +
inc P8ESTACK_HI+1,x
+ lda P8ESTACK_HI+1,x
+ asl a
ror P8ESTACK_HI+1,x
ror P8ESTACK_LO+1,x""")
}
else -> throw AssemblyError("weird dt")
} }
return
} }
} }
} }

View File

@ -512,11 +512,7 @@ class CodeGen(internal val program: PtProgram,
return code return code
val pow2 = powersOfTwo.indexOf(factor) val pow2 = powersOfTwo.indexOf(factor)
if(pow2==1 && !signed) { if(pow2==1 && !signed) {
// just shift 1 bit code += VmCodeInstruction(Opcode.LSR, dt, reg1=reg) // simple single bit shift
code += if(signed)
VmCodeInstruction(Opcode.ASR, dt, reg1=reg)
else
VmCodeInstruction(Opcode.LSR, dt, reg1=reg)
} }
else if(pow2>=1 &&!signed) { else if(pow2>=1 &&!signed) {
// just shift multiple bits // just shift multiple bits
@ -545,11 +541,7 @@ class CodeGen(internal val program: PtProgram,
return code return code
val pow2 = powersOfTwo.indexOf(factor) val pow2 = powersOfTwo.indexOf(factor)
if(pow2==1 && !signed) { if(pow2==1 && !signed) {
// just shift 1 bit code += VmCodeInstruction(Opcode.LSRM, dt, value=address) // just simple bit shift
code += if(signed)
VmCodeInstruction(Opcode.ASRM, dt, value=address)
else
VmCodeInstruction(Opcode.LSRM, dt, value=address)
} }
else if(pow2>=1 && !signed) { else if(pow2>=1 && !signed) {
// just shift multiple bits // just shift multiple bits

View File

@ -21,6 +21,7 @@ import kotlin.math.pow
class ExpressionSimplifier(private val program: Program) : AstWalker() { class ExpressionSimplifier(private val program: Program) : AstWalker() {
private val powersOfTwo = (1..16).map { (2.0).pow(it) }.toSet() private val powersOfTwo = (1..16).map { (2.0).pow(it) }.toSet()
private val negativePowersOfTwo = powersOfTwo.map { -it }.toSet()
override fun after(typecast: TypecastExpression, parent: Node): Iterable<IAstModification> { override fun after(typecast: TypecastExpression, parent: Node): Iterable<IAstModification> {
val mods = mutableListOf<IAstModification>() val mods = mutableListOf<IAstModification>()
@ -469,7 +470,9 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
} }
in powersOfTwo -> { in powersOfTwo -> {
if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) { if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) {
// unsigned number divided by a power of two => shift right // Unsigned number divided by a power of two => shift right
// Signed number can't simply be bitshifted in this case (due to rounding issues for negative values),
// so we leave that as is and let the code generator deal with it.
val numshifts = log2(cv).toInt() val numshifts = log2(cv).toInt()
return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position) return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
} }
@ -529,6 +532,14 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
return BinaryExpression(expr2.left, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position) return BinaryExpression(expr2.left, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
} }
} }
in negativePowersOfTwo -> {
if (leftValue.inferType(program).isInteger) {
// times a negative power of two => negate, then shift
val numshifts = log2(-cv).toInt()
val negation = PrefixExpression("-", expr2.left, expr.position)
return BinaryExpression(negation, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
}
}
} }
} }
// no need to check for left val constant (because of associativity) // no need to check for left val constant (because of associativity)

View File

@ -236,6 +236,11 @@ Unsigned integers are in the range 0-255 for unsigned byte types, and 0-65535 fo
The signed integers integers are in the range -128..127 for bytes, The signed integers integers are in the range -128..127 for bytes,
and -32768..32767 for words. and -32768..32767 for words.
.. caution::
Doing math on signed integers can result in code that is a lot larger and slower than
when using unsigned integers. Make sure you really need the signed numbers, otherwise
stick to unsigned integers for efficiency.
Boolean values Boolean values
^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^

View File

@ -3,16 +3,12 @@ TODO
For next release For next release
^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^
- Add optimized signed word division for factors of 2 (bit shifting but this time with correct rounding)
CodeGen divideByConst() and divideByConstInplace()
ExpressionsAsmGen translateExpression()
ExpressionSimplifier optimizeDivision() ?
- add item to XyzZeropage that enables an option that if zeropage=FULL or KERNALSAFE, moves the cx16 virtual registers to ZP, same location as on x16 - add item to XyzZeropage that enables an option that if zeropage=FULL or KERNALSAFE, moves the cx16 virtual registers to ZP, same location as on x16
(can be done on C64 only for now) Remove those addresses from the ZP free pool = allocate them in ZP like Cx16Zeropage does (can be done on C64 only for now) Remove those addresses from the ZP free pool = allocate them in ZP like Cx16Zeropage does
Adapt the code in AstPreprocessor that relocates the registers as well. Adapt the code in AstPreprocessor that relocates the registers as well.
- for uword pointer variables: allow pointer[uword] array indexing >255 , rewrite it to @(pointer+index) - for uword pointer variables: allow pointer[uword] array indexing >255 , rewrite it to @(pointer+index)
DO NOT allow this for regular array indexing because normal arrays can never exceed size 256 DO NOT allow this for regular array indexing because normal arrays can never exceed size 256
... ...
@ -26,8 +22,7 @@ Need help with
Future Things and Ideas Future Things and Ideas
^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^
Compiler: Compiler:
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is. - vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is. This info is needed for more advanced optimizations and later code generation steps.
this info is needed for more advanced optimizations and later code generation steps.
- vm: implement remaining sin/cos functions in math.p8 - vm: implement remaining sin/cos functions in math.p8
- vm: find a solution for the cx16.r0..r15 that "overlap" (r0, r0L, r0H etc) but in the vm each get their own separate variable location now - vm: find a solution for the cx16.r0..r15 that "overlap" (r0, r0L, r0H etc) but in the vm each get their own separate variable location now
- vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8 - vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8
@ -36,8 +31,8 @@ Compiler:
- vm: add ore optimizations in VmPeepholeOptimizer - vm: add ore optimizations in VmPeepholeOptimizer
- see if we can let for loops skip the loop if end<start, like other programming languages. Without adding a lot of code size/duplicating the loop condition. - see if we can let for loops skip the loop if end<start, like other programming languages. Without adding a lot of code size/duplicating the loop condition.
this is documented behavior to now loop around but it's too easy to forget about! this is documented behavior to now loop around but it's too easy to forget about!
Lot of work because of so many special cases in ForLoopsAsmgen..... Lot of work because of so many special cases in ForLoopsAsmgen.....
How is it for the vm target? -> just 2 special cases in CodeGen. How is it for the vm target? -> just 2 special cases in CodeGen.
- when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed. - when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed. It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.
So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step. So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step.

View File

@ -8,22 +8,24 @@ main {
txt.nl() txt.nl()
} }
; TODO test with new optimized division routines.
sub start() { sub start() {
byte qq = 1 word qq = 1
byte bb = -51 word bb = -5051
derp((bb*qq)/-4, 1,2,3,4) derp((bb*qq)/-2, 1,2,3,4)
bb /= -4 bb /= -2
txt.print_b(bb) txt.print_w(bb)
txt.nl() txt.nl()
bb = 51 bb = -5051
bb /= -4 bb = -bb/2
txt.print_b(bb) txt.print_w(bb)
txt.nl() txt.nl()
ubyte ubb = 51 bb = 5051
ubb /= 4 bb /= -2
txt.print_ub(ubb) txt.print_w(bb)
txt.nl()
uword ubb = 5051
ubb /= 2
txt.print_uw(ubb)
txt.nl() txt.nl()
} }
} }