mirror of
https://github.com/irmen/prog8.git
synced 2025-01-11 13:29:45 +00:00
added optimized case for signed division by 2
This commit is contained in:
parent
dcc1f00048
commit
046dceb5c2
@ -519,14 +519,39 @@ internal class ExpressionsAsmGen(private val program: Program,
|
||||
val rightVal = expr.right.constValue(program)?.number?.toInt()
|
||||
if(rightVal!=null && rightVal==2) {
|
||||
translateExpressionInternal(expr.left)
|
||||
// shifting only yields the correct rounded result on unsinged numbers
|
||||
if(leftDt==DataType.UBYTE) {
|
||||
asmgen.out(" lsr P8ESTACK_LO+1,x")
|
||||
return
|
||||
} else if(leftDt==DataType.UWORD) {
|
||||
asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x")
|
||||
return
|
||||
when (leftDt) {
|
||||
DataType.UBYTE -> {
|
||||
asmgen.out(" lsr P8ESTACK_LO+1,x")
|
||||
}
|
||||
DataType.UWORD -> {
|
||||
asmgen.out(" lsr P8ESTACK_HI+1,x | ror P8ESTACK_LO+1,x")
|
||||
}
|
||||
DataType.BYTE -> {
|
||||
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
|
||||
asmgen.out("""
|
||||
lda P8ESTACK_LO+1,x
|
||||
bpl +
|
||||
inc P8ESTACK_LO+1,x
|
||||
lda P8ESTACK_LO+1,x
|
||||
+ asl a
|
||||
ror P8ESTACK_LO+1,x""")
|
||||
}
|
||||
DataType.WORD -> {
|
||||
// signed divide using shift needs adjusting of negative value to get correct rounding towards zero
|
||||
asmgen.out("""
|
||||
lda P8ESTACK_HI+1,x
|
||||
bpl ++
|
||||
inc P8ESTACK_LO+1,x
|
||||
bne +
|
||||
inc P8ESTACK_HI+1,x
|
||||
+ lda P8ESTACK_HI+1,x
|
||||
+ asl a
|
||||
ror P8ESTACK_HI+1,x
|
||||
ror P8ESTACK_LO+1,x""")
|
||||
}
|
||||
else -> throw AssemblyError("weird dt")
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -512,11 +512,7 @@ class CodeGen(internal val program: PtProgram,
|
||||
return code
|
||||
val pow2 = powersOfTwo.indexOf(factor)
|
||||
if(pow2==1 && !signed) {
|
||||
// just shift 1 bit
|
||||
code += if(signed)
|
||||
VmCodeInstruction(Opcode.ASR, dt, reg1=reg)
|
||||
else
|
||||
VmCodeInstruction(Opcode.LSR, dt, reg1=reg)
|
||||
code += VmCodeInstruction(Opcode.LSR, dt, reg1=reg) // simple single bit shift
|
||||
}
|
||||
else if(pow2>=1 &&!signed) {
|
||||
// just shift multiple bits
|
||||
@ -545,11 +541,7 @@ class CodeGen(internal val program: PtProgram,
|
||||
return code
|
||||
val pow2 = powersOfTwo.indexOf(factor)
|
||||
if(pow2==1 && !signed) {
|
||||
// just shift 1 bit
|
||||
code += if(signed)
|
||||
VmCodeInstruction(Opcode.ASRM, dt, value=address)
|
||||
else
|
||||
VmCodeInstruction(Opcode.LSRM, dt, value=address)
|
||||
code += VmCodeInstruction(Opcode.LSRM, dt, value=address) // just simple bit shift
|
||||
}
|
||||
else if(pow2>=1 && !signed) {
|
||||
// just shift multiple bits
|
||||
|
@ -21,6 +21,7 @@ import kotlin.math.pow
|
||||
|
||||
class ExpressionSimplifier(private val program: Program) : AstWalker() {
|
||||
private val powersOfTwo = (1..16).map { (2.0).pow(it) }.toSet()
|
||||
private val negativePowersOfTwo = powersOfTwo.map { -it }.toSet()
|
||||
|
||||
override fun after(typecast: TypecastExpression, parent: Node): Iterable<IAstModification> {
|
||||
val mods = mutableListOf<IAstModification>()
|
||||
@ -469,7 +470,9 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
|
||||
}
|
||||
in powersOfTwo -> {
|
||||
if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) {
|
||||
// unsigned number divided by a power of two => shift right
|
||||
// Unsigned number divided by a power of two => shift right
|
||||
// Signed number can't simply be bitshifted in this case (due to rounding issues for negative values),
|
||||
// so we leave that as is and let the code generator deal with it.
|
||||
val numshifts = log2(cv).toInt()
|
||||
return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
||||
}
|
||||
@ -529,6 +532,14 @@ class ExpressionSimplifier(private val program: Program) : AstWalker() {
|
||||
return BinaryExpression(expr2.left, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
||||
}
|
||||
}
|
||||
in negativePowersOfTwo -> {
|
||||
if (leftValue.inferType(program).isInteger) {
|
||||
// times a negative power of two => negate, then shift
|
||||
val numshifts = log2(-cv).toInt()
|
||||
val negation = PrefixExpression("-", expr2.left, expr.position)
|
||||
return BinaryExpression(negation, "<<", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// no need to check for left val constant (because of associativity)
|
||||
|
@ -236,6 +236,11 @@ Unsigned integers are in the range 0-255 for unsigned byte types, and 0-65535 fo
|
||||
The signed integers integers are in the range -128..127 for bytes,
|
||||
and -32768..32767 for words.
|
||||
|
||||
.. caution::
|
||||
Doing math on signed integers can result in code that is a lot larger and slower than
|
||||
when using unsigned integers. Make sure you really need the signed numbers, otherwise
|
||||
stick to unsigned integers for efficiency.
|
||||
|
||||
|
||||
Boolean values
|
||||
^^^^^^^^^^^^^^
|
||||
|
@ -3,16 +3,12 @@ TODO
|
||||
|
||||
For next release
|
||||
^^^^^^^^^^^^^^^^
|
||||
- Add optimized signed word division for factors of 2 (bit shifting but this time with correct rounding)
|
||||
CodeGen divideByConst() and divideByConstInplace()
|
||||
ExpressionsAsmGen translateExpression()
|
||||
ExpressionSimplifier optimizeDivision() ?
|
||||
|
||||
- add item to XyzZeropage that enables an option that if zeropage=FULL or KERNALSAFE, moves the cx16 virtual registers to ZP, same location as on x16
|
||||
(can be done on C64 only for now) Remove those addresses from the ZP free pool = allocate them in ZP like Cx16Zeropage does
|
||||
Adapt the code in AstPreprocessor that relocates the registers as well.
|
||||
- for uword pointer variables: allow pointer[uword] array indexing >255 , rewrite it to @(pointer+index)
|
||||
DO NOT allow this for regular array indexing because normal arrays can never exceed size 256
|
||||
|
||||
...
|
||||
|
||||
|
||||
@ -26,8 +22,7 @@ Need help with
|
||||
Future Things and Ideas
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Compiler:
|
||||
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is.
|
||||
this info is needed for more advanced optimizations and later code generation steps.
|
||||
- vm Instruction needs to know what the read-registers/memory are, and what the write-register/memory is. This info is needed for more advanced optimizations and later code generation steps.
|
||||
- vm: implement remaining sin/cos functions in math.p8
|
||||
- vm: find a solution for the cx16.r0..r15 that "overlap" (r0, r0L, r0H etc) but in the vm each get their own separate variable location now
|
||||
- vm: somehow deal with asmsubs otherwise the vm IR can't fully encode all of prog8
|
||||
@ -36,8 +31,8 @@ Compiler:
|
||||
- vm: add ore optimizations in VmPeepholeOptimizer
|
||||
- see if we can let for loops skip the loop if end<start, like other programming languages. Without adding a lot of code size/duplicating the loop condition.
|
||||
this is documented behavior to now loop around but it's too easy to forget about!
|
||||
Lot of work because of so many special cases in ForLoopsAsmgen.....
|
||||
How is it for the vm target? -> just 2 special cases in CodeGen.
|
||||
Lot of work because of so many special cases in ForLoopsAsmgen.....
|
||||
How is it for the vm target? -> just 2 special cases in CodeGen.
|
||||
- when the vm is stable and *if* its language can get promoted to prog8 IL, the variable allocation should be changed.
|
||||
It's now done before the vm code generation, but the IL should probably not depend on the allocations already performed.
|
||||
So the CodeGen doesn't do VariableAlloc *before* the codegen, but as a last step.
|
||||
|
@ -8,22 +8,24 @@ main {
|
||||
txt.nl()
|
||||
}
|
||||
|
||||
; TODO test with new optimized division routines.
|
||||
|
||||
sub start() {
|
||||
byte qq = 1
|
||||
byte bb = -51
|
||||
derp((bb*qq)/-4, 1,2,3,4)
|
||||
bb /= -4
|
||||
txt.print_b(bb)
|
||||
word qq = 1
|
||||
word bb = -5051
|
||||
derp((bb*qq)/-2, 1,2,3,4)
|
||||
bb /= -2
|
||||
txt.print_w(bb)
|
||||
txt.nl()
|
||||
bb = 51
|
||||
bb /= -4
|
||||
txt.print_b(bb)
|
||||
bb = -5051
|
||||
bb = -bb/2
|
||||
txt.print_w(bb)
|
||||
txt.nl()
|
||||
ubyte ubb = 51
|
||||
ubb /= 4
|
||||
txt.print_ub(ubb)
|
||||
bb = 5051
|
||||
bb /= -2
|
||||
txt.print_w(bb)
|
||||
txt.nl()
|
||||
uword ubb = 5051
|
||||
ubb /= 2
|
||||
txt.print_uw(ubb)
|
||||
txt.nl()
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user