mirror of
https://github.com/irmen/prog8.git
synced 2024-12-24 16:29:21 +00:00
added optimized case for signed division by 2
This commit is contained in:
parent
dcc1f00048
commit
046dceb5c2
@ -519,14 +519,39 @@ internal class ExpressionsAsmGen(private val program: Program,
|
|||||||
val rightVal = expr.right.constValue(program)?.number?.toInt()
|
val rightVal = expr.right.constValue(program)?.number?.toInt()
|
||||||
if(rightVal!=null && rightVal==2) {
|
if(rightVal!=null && rightVal==2) {
|
||||||
translateExpressionInternal(expr.left)
|
translateExpressionInternal(expr.left)
|
||||||
// shifting only yields the correct rounded result on unsinged numbers
|
when (leftDt) {
|
||||||
if(leftDt==DataType.UBYTE) {
|
DataType.UBYTE -> {
|
||||||
asmgen.out(" lsr P8ESTACK_LO+1,x")
|
asmgen.out(" lsr P8ESTACK_LO+1,x")
|
||||||
return
|
}
|
||||||
} else if(leftDt==DataType.UWORD) {
|
DataType.UWORD -> {
|
||||||
asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x")
|
asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x")
|
||||||
return
|
}
|
||||||
|
DataType.BYTE -> {
|
||||||
|
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
|
||||||
|
asmgen.out("""
|
||||||
|
lda P8ESTACK_LO+1,x
|
||||||
|
bpl +
|
||||||
|
inc P8ESTACK_LO+1,x
|
||||||
|
lda P8ESTACK_LO+1,x
|
||||||
|
+ asl a
|
||||||
|
ror P8ESTACK_LO+1,x""")
|
||||||
|
}
|
||||||
|
DataType.WORD -> {
|
||||||
|
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
|
||||||
|
asmgen.out("""
|
||||||
|
lda P8ESTACK_HI+1,x
|
||||||
|
bpl ++
|
||||||
|
inc P8ESTACK_LO+1,x
|
||||||
|
bne +
|
||||||
|
inc P8ESTACK_HI+1,x
|
||||||
|
+ lda P8ESTACK_HI+1,x
|
||||||
|
+ asl a
|
||||||
|
ror P8ESTACK_HI+1,x
|
||||||
|
ror P8ESTACK_LO+1,x""")
|
||||||
|
}
|
||||||
|
else -> throw AssemblyError("weird dt")
|
||||||
}
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -512,11 +512,7 @@ class CodeGen(internal val program: PtProgram,
|
|||||||
return code
|
return code
|
||||||
val pow2 = powersOfTwo.indexOf(factor)
|
val pow2 = powersOfTwo.indexOf(factor)
|
||||||
if(pow2==1 && !signed) {
|
if(pow2==1 && !signed) {
|
||||||
// just shift 1 bit
|
code += VmCodeInstruction(Opcode.LSR, dt, reg1=reg) // simple single bit shift
|
||||||
code += if(signed)
|
|
||||||
VmCodeInstruction(Opcode.ASR, dt, reg1=reg)
|
|
||||||
else
|
|
||||||
VmCodeInstruction(Opcode.LSR, dt, reg1=reg)
|
|
||||||
}
|
}
|
||||||
else if(pow2>=1 &&!signed) {
|
else if(pow2>=1 &&!signed) {
|
||||||
// just shift multiple bits
|
// just shift multiple bits
|
||||||
@ -545,11 +541,7 @@ class CodeGen(internal val program: PtProgram,
|
|||||||
return code
|
return code
|
||||||
val pow2 = powersOfTwo.indexOf(factor)
|
val pow2 = powersOfTwo.indexOf(factor)
|
||||||
if(pow2==1 && !signed) {
|
if(pow2==1 && !signed) {
|
||||||
// just shift 1 bit
|
code += VmCodeInstruction(Opcode.LSRM, dt, value=address) // just simple bit shift
|
||||||
code += if(signed)
|
|
||||||
VmCodeInstruction(Opcode.ASRM, dt, value=address)
|
|
||||||
else
|
|
||||||
VmCodeInstruction(Opcode.LSRM, dt, value=address)
|
|
||||||
}
|
}
|
||||||
else if(pow2>=1 && !signed) {
|
else if(pow2>=1 && !signed) {
|
||||||
// just shift multiple bits
|
// just shift multiple bits
|
||||||
|
@ -21,6 +21,7 @@ import kotlin.math.pow
|
|||||||
|
|
||||||
class ExpressionSimplifier(private val program: Program) : AstWalker() {
|
class ExpressionSimplifier(private val program: Program) : AstWalker() {
|
||||||
private val powersOfTwo = (1..16).map { (2.0).pow(it) }.toSet()
|
private val powersOfTwo = (1..16).map { (2.0).pow(it) }.toSet()
|
||||||
|
private val negativePowersOfTwo = powersOfTwo.map { -it }.toSet()
|
||||||
|
|
||||||
override fun after(typecast: TypecastExpression, parent: Node): Iterable<IAstModification> {
|
override fun after(typecast: TypecastExpression, parent: Node): Iterable<IAstModification> {
|
||||||
val mods = mutableListOf<IAstModification>()
|
val mods = mutableListOf<IAstModification>()
|
||||||
@ -469,7 +470,9 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
|
|||||||
}
|
}
|
||||||
in powersOfTwo -> {
|
in powersOfTwo -> {
|
||||||
if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) {
|
if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) {
|
||||||
// unsigned number divided by a power of two => shift right
|
// Unsigned number divided by a power of two => shift right
|
||||||
|
// Signed number can't simply be bitshifted in this case (due to rounding issues for negative values),
|
||||||
|
// so we leave that as is and let the code generator deal with it.
|
||||||
val numshifts = log2(cv).toInt()
|
val numshifts = log2(cv).toInt()
|
||||||
return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
||||||
}
|
}
|
||||||
@ -529,6 +532,14 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
|
|||||||
return BinaryExpression(expr2.left, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
return BinaryExpression(expr2.left, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
in negativePowersOfTwo -> {
|
||||||
|
if (leftValue.inferType(program).isInteger) {
|
||||||
|
// times a negative power of two => negate, then shift
|
||||||
|
val numshifts = log2(-cv).toInt()
|
||||||
|
val negation = PrefixExpression("-", expr2.left, expr.position)
|
||||||
|
return BinaryExpression(negation, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// no need to check for left val constant (because of associativity)
|
// no need to check for left val constant (because of associativity)
|
||||||
|
@ -236,6 +236,11 @@ Unsigned integers are in the range 0-255 for unsigned byte types, and 0-65535 fo
|
|||||||
The signed integers integers are in the range -128..127 for bytes,
|
The signed integers integers are in the range -128..127 for bytes,
|
||||||
and -32768..32767 for words.
|
and -32768..32767 for words.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
Doing math on signed integers can result in code that is a lot larger and slower than
|
||||||
|
when using unsigned integers. Make sure you really need the signed numbers, otherwise
|
||||||
|
stick to unsigned integers for efficiency.
|
||||||
|
|
||||||
|
|
||||||
Boolean values
|
Boolean values
|
||||||
^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^
|
||||||
|
@ -3,16 +3,12 @@ TODO
|
|||||||
|
|
||||||
For next release
|
For next release
|
||||||
^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^
|
||||||
- Add optimized signed word division for factors of 2 (bit shifting but this time with correct rounding)
|
|
||||||
CodeGen divideByConst() and divideByConstInplace()
|
|
||||||
ExpressionsAsmGen translateExpression()
|
|
||||||
ExpressionSimplifier optimizeDivision() ?
|
|
||||||
|
|
||||||
- add item to XyzZeropage that enables an option that if zeropage=FULL or KERNALSAFE, moves the cx16 virtual registers to ZP, same location as on x16
|
- add item to XyzZeropage that enables an option that if zeropage=FULL or KERNALSAFE, moves the cx16 virtual registers to ZP, same location as on x16
|
||||||
(can be done on C64 only for now) Remove those addresses from the ZP free pool = allocate them in ZP like Cx16Zeropage does
|
(can be done on C64 only for now) Remove those addresses from the ZP free pool = allocate them in ZP like Cx16Zeropage does
|
||||||
Adapt the code in AstPreprocessor that relocates the registers as well.
|
Adapt the code in AstPreprocessor that relocates the registers as well.
|
||||||
- for uword pointer variables: allow pointer[uword] array indexing >255 , rewrite it to @(pointer+index)
|
- for uword pointer variables: allow pointer[uword] array indexing >255 , rewrite it to @(pointer+index)
|
||||||
DO NOT allow this for regular array indexing because normal arrays can never exceed size 256
|
DO NOT allow this for regular array indexing because normal arrays can never exceed size 256
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
@ -26,8 +22,7 @@ Need help with
|
|||||||
Future Things and Ideas
|
Future Things and Ideas
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
Compiler:
|
Compiler:
|
||||||
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is.
|
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is. This info is needed for more advanced optimizations and later code generation steps.
|
||||||
this info is needed for more advanced optimizations and later code generation steps.
|
|
||||||
- vm: implement remaining sin/cos functions in math.p8
|
- vm: implement remaining sin/cos functions in math.p8
|
||||||
- vm: find a solution for the cx16.r0..r15 that "overlap" (r0, r0L, r0H etc) but in the vm each get their own separate variable location now
|
- vm: find a solution for the cx16.r0..r15 that "overlap" (r0, r0L, r0H etc) but in the vm each get their own separate variable location now
|
||||||
- vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8
|
- vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8
|
||||||
@ -36,8 +31,8 @@ Compiler:
|
|||||||
- vm: add ore optimizations in VmPeepholeOptimizer
|
- vm: add ore optimizations in VmPeepholeOptimizer
|
||||||
- see if we can let for loops skip the loop if end<start, like other programming languages. Without adding a lot of code size/duplicating the loop condition.
|
- see if we can let for loops skip the loop if end<start, like other programming languages. Without adding a lot of code size/duplicating the loop condition.
|
||||||
this is documented behavior to now loop around but it's too easy to forget about!
|
this is documented behavior to now loop around but it's too easy to forget about!
|
||||||
Lot of work because of so many special cases in ForLoopsAsmgen.....
|
Lot of work because of so many special cases in ForLoopsAsmgen.....
|
||||||
How is it for the vm target? -> just 2 special cases in CodeGen.
|
How is it for the vm target? -> just 2 special cases in CodeGen.
|
||||||
- when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
|
- when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
|
||||||
It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.
|
It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.
|
||||||
So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step.
|
So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step.
|
||||||
|
@ -8,22 +8,24 @@ main {
|
|||||||
txt.nl()
|
txt.nl()
|
||||||
}
|
}
|
||||||
|
|
||||||
; TODO test with new optimized division routines.
|
|
||||||
|
|
||||||
sub start() {
|
sub start() {
|
||||||
byte qq = 1
|
word qq = 1
|
||||||
byte bb = -51
|
word bb = -5051
|
||||||
derp((bb*qq)/-4, 1,2,3,4)
|
derp((bb*qq)/-2, 1,2,3,4)
|
||||||
bb /= -4
|
bb /= -2
|
||||||
txt.print_b(bb)
|
txt.print_w(bb)
|
||||||
txt.nl()
|
txt.nl()
|
||||||
bb = 51
|
bb = -5051
|
||||||
bb /= -4
|
bb = -bb/2
|
||||||
txt.print_b(bb)
|
txt.print_w(bb)
|
||||||
txt.nl()
|
txt.nl()
|
||||||
ubyte ubb = 51
|
bb = 5051
|
||||||
ubb /= 4
|
bb /= -2
|
||||||
txt.print_ub(ubb)
|
txt.print_w(bb)
|
||||||
|
txt.nl()
|
||||||
|
uword ubb = 5051
|
||||||
|
ubb /= 2
|
||||||
|
txt.print_uw(ubb)
|
||||||
txt.nl()
|
txt.nl()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user