optimize division by powers of 2 better (into bit shifts)

This commit is contained in:
Irmen de Jong 2024-07-21 20:42:48 +02:00
parent 0af17cdc33
commit 3681d6ee1c
7 changed files with 104 additions and 89 deletions

View File

@ -3,7 +3,7 @@ package prog8.code.core
import kotlin.math.abs
import kotlin.math.pow
val powersOfTwoFloat = (1..16).map { (2.0).pow(it) }.toTypedArray()
val powersOfTwoFloat = (0..16).map { (2.0).pow(it) }.toTypedArray()
val negativePowersOfTwoFloat = powersOfTwoFloat.map { -it }.toTypedArray()
val powersOfTwoInt = (0..16).map { 2.0.pow(it).toInt() }.toTypedArray()

View File

@ -933,11 +933,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
}
private fun optimizedDivideExpr(expr: PtBinaryExpression, target: AsmAssignTarget): Boolean {
val constDivisor = expr.right.asConstInteger()
if(constDivisor in powersOfTwoInt) {
println("TODO optimize: divide ${expr.type} by power-of-2 ${constDivisor} at ${expr.position}") // TODO
}
// replacing division by shifting is done in an optimizer step.
when(expr.type) {
DataType.UBYTE -> {
assignExpressionToRegister(expr.left, RegisterOrPair.A, false)

View File

@ -1477,9 +1477,7 @@ $shortcutLabel:""")
asmgen.out(" lda $name | ldy #$value | jsr math.multiply_bytes | sta $name")
}
"/" -> {
if(value in powersOfTwoInt) {
println("TODO optimize: (u)byte division by power-of-2 $value") // TODO
}
// replacing division by shifting is done in an optimizer step.
if (dt == DataType.UBYTE)
asmgen.out(" lda $name | ldy #$value | jsr math.divmod_ub_asm | sty $name")
else
@ -1828,36 +1826,36 @@ $shortcutLabel:""")
}
}
"/" -> {
if(value==0)
// replacing division by shifting is done in an optimizer step.
if(value==0) {
throw AssemblyError("division by zero")
else if (value in powersOfTwoInt) {
println("TODO optimize: (u)word division by power-of-2 $value") // TODO
}
if(dt==DataType.WORD) {
asmgen.out("""
lda $lsb
ldy $msb
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
lda #<$value
ldy #>$value
jsr math.divmod_w_asm
sta $lsb
sty $msb
""")
}
else {
asmgen.out("""
lda $lsb
ldy $msb
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
lda #<$value
ldy #>$value
jsr math.divmod_uw_asm
sta $lsb
sty $msb
""")
} else {
if(dt==DataType.WORD) {
asmgen.out("""
lda $lsb
ldy $msb
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
lda #<$value
ldy #>$value
jsr math.divmod_w_asm
sta $lsb
sty $msb
""")
}
else {
asmgen.out("""
lda $lsb
ldy $msb
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
lda #<$value
ldy #>$value
jsr math.divmod_uw_asm
sta $lsb
sty $msb
""")
}
}
}
"%" -> {

View File

@ -783,19 +783,31 @@ class IRCodeGen(
if(factor==1)
return code
val pow2 = powersOfTwoInt.indexOf(factor)
// TODO also try to optimize for signed division by powers of 2
if(pow2==1 && !signed) {
code += IRInstruction(Opcode.LSR, dt, reg1=reg) // simple single bit shift
}
else if(pow2>=1 &&!signed) {
// just shift multiple bits (unsigned)
val pow2reg = registers.nextFree()
code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2)
code += if(signed)
IRInstruction(Opcode.ASRN, dt, reg1=reg, reg2=pow2reg)
else
IRInstruction(Opcode.LSRN, dt, reg1=reg, reg2=pow2reg)
if(pow2>=0) {
if(signed) {
if(pow2==1) {
// simple single bit shift (signed)
code += IRInstruction(Opcode.ASR, dt, reg1=reg)
} else {
// just shift multiple bits (signed)
val pow2reg = registers.nextFree()
code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2)
code += IRInstruction(Opcode.ASRN, dt, reg1=reg, reg2=pow2reg)
}
} else {
if(pow2==1) {
// simple single bit shift (unsigned)
code += IRInstruction(Opcode.LSR, dt, reg1=reg)
} else {
// just shift multiple bits (unsigned)
val pow2reg = registers.nextFree()
code += IRInstruction(Opcode.LOAD, dt, reg1 = pow2reg, immediate = pow2)
code += IRInstruction(Opcode.LSRN, dt, reg1 = reg, reg2 = pow2reg)
}
}
return code
} else {
// regular div
code += if (factor == 0) {
IRInstruction(Opcode.LOAD, dt, reg1=reg, immediate = 0xffff)
} else {
@ -804,8 +816,8 @@ class IRCodeGen(
else
IRInstruction(Opcode.DIV, dt, reg1=reg, immediate = factor)
}
return code
}
return code
}
internal fun divideByConstInplace(dt: IRDataType, knownAddress: Int?, symbol: String?, factor: Int, signed: Boolean): IRCodeChunk {
@ -813,31 +825,47 @@ class IRCodeGen(
if(factor==1)
return code
val pow2 = powersOfTwoInt.indexOf(factor)
// TODO also try to optimize for signed division by powers of 2
if(pow2==1 && !signed) {
// just simple bit shift
code += if(knownAddress!=null)
IRInstruction(Opcode.LSRM, dt, address = knownAddress)
else
IRInstruction(Opcode.LSRM, dt, labelSymbol = symbol)
if(pow2>=0) {
// can do bit shift instead of division
if(signed) {
if(pow2==1) {
// just simple bit shift (signed)
code += if (knownAddress != null)
IRInstruction(Opcode.ASRM, dt, address = knownAddress)
else
IRInstruction(Opcode.ASRM, dt, labelSymbol = symbol)
} else {
// just shift multiple bits (signed)
val pow2reg = registers.nextFree()
code += IRInstruction(Opcode.LOAD, dt, reg1 = pow2reg, immediate = pow2)
code += if (knownAddress != null)
IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, address = knownAddress)
else
IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, labelSymbol = symbol)
}
} else {
if(pow2==1) {
// just simple bit shift (unsigned)
code += if(knownAddress!=null)
IRInstruction(Opcode.LSRM, dt, address = knownAddress)
else
IRInstruction(Opcode.LSRM, dt, labelSymbol = symbol)
}
else {
// just shift multiple bits (unsigned)
val pow2reg = registers.nextFree()
code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2)
code += if(knownAddress!=null)
IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, address = knownAddress)
else
IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, labelSymbol = symbol)
}
}
return code
}
else if(pow2>=1 && !signed) {
// just shift multiple bits (unsigned)
val pow2reg = registers.nextFree()
code += IRInstruction(Opcode.LOAD, dt, reg1=pow2reg, immediate = pow2)
code += if(signed) {
if(knownAddress!=null)
IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, address = knownAddress)
else
IRInstruction(Opcode.ASRNM, dt, reg1 = pow2reg, labelSymbol = symbol)
}
else {
if(knownAddress!=null)
IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, address = knownAddress)
else
IRInstruction(Opcode.LSRNM, dt, reg1 = pow2reg, labelSymbol = symbol)
}
} else {
else
{
// regular div
if (factor == 0) {
val reg = registers.nextFree()
code += IRInstruction(Opcode.LOAD, dt, reg1=reg, immediate = 0xffff)
@ -862,8 +890,8 @@ class IRCodeGen(
IRInstruction(Opcode.DIVM, dt, reg1 = factorReg, labelSymbol = symbol)
}
}
return code
}
return code
}
private fun translate(ifElse: PtIfElse): IRCodeChunks {

View File

@ -708,6 +708,7 @@ class ExpressionSimplifier(private val program: Program, private val options: Co
return null
val leftDt = leftIDt.getOr(DataType.UNDEFINED)
when (cv) {
0.0 -> return null // fall through to regular float division to properly deal with division by zero
-1.0 -> {
// '/' -> -left
if (expr.operator == "/") {
@ -736,14 +737,10 @@ class ExpressionSimplifier(private val program: Program, private val options: Co
}
}
in powersOfTwoFloat -> {
if (leftDt==DataType.UBYTE || leftDt==DataType.UWORD) {
// Unsigned number divided by a power of two => shift right
// Signed number can't simply be bitshifted in this case (due to rounding issues for negative values),
// so we leave that as is and let the code generator deal with it.
val numshifts = log2(cv).toInt()
val numshifts = powersOfTwoFloat.indexOf(cv)
if (leftDt in IntegerDatatypes) {
// division by a power of two => shift right (signed and unsigned)
return BinaryExpression(expr.left, ">>", NumericLiteral.optimalInteger(numshifts, expr.position), expr.position)
} else {
println("TODO optimize: divide by power-of-2 $cv at ${expr.position}") // TODO
}
}
}

View File

@ -5,11 +5,7 @@ See open issues on github.
Re-generate the skeletons doc files.
optimize byte/word division by powers of 2 (and shift right?), it's now often still using divmod routine. (also % ?)
see the TODOs in inplacemodificationByteVariableWithLiteralval(), inplacemodificationSomeWordWithLiteralval(), optimizedDivideExpr(),
and finally in optimizeDivision()
and for IR: see divideByConst() / divideByConstInplace() in IRCodeGen
optimize signed word bit shifting?:
1 shift right of AX signed word:
stx P8ZP_SCRATCH_B1
cpx #$80

View File

@ -61,7 +61,7 @@ main {
}
sub unsigned() {
txt.print("unsigned\n")
txt.print("\nunsigned\n")
ubyte @shared ubvalue = 88
uword @shared uwvalue = 8888