added -verafxmul compiler option to use vera fx multiplication routine on cx16

This commit is contained in:
Irmen de Jong 2023-10-01 22:02:07 +02:00
parent 755cc4835e
commit 690782bf60
15 changed files with 200 additions and 59 deletions

View File

@ -21,6 +21,7 @@ class CompilationOptions(val output: OutputType,
var experimentalCodegen: Boolean = false, var experimentalCodegen: Boolean = false,
var varsHighBank: Int? = null, var varsHighBank: Int? = null,
var splitWordArrays: Boolean = false, var splitWordArrays: Boolean = false,
var veraFxMul: Boolean = false,
var outputDir: Path = Path(""), var outputDir: Path = Path(""),
var symbolDefs: Map<String, String> = emptyMap() var symbolDefs: Map<String, String> = emptyMap()
) { ) {

View File

@ -544,10 +544,28 @@ internal class AssignmentAsmGen(private val program: PtProgram,
return true return true
} }
in WordDatatypes -> { in WordDatatypes -> {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier") if(asmgen.options.veraFxMul) {
asmgen.out(" jsr math.multiply_words") if(expr.right.isSimple()) {
assignRegisterpairWord(target, RegisterOrPair.AY) asmgen.assignExpressionToRegister(expr.left, RegisterOrPair.R0, expr.left.type in SignedDatatypes)
return true asmgen.assignExpressionToRegister(expr.right, RegisterOrPair.R1, expr.left.type in SignedDatatypes)
} else {
asmgen.assignExpressionToRegister(expr.left, RegisterOrPair.AY, expr.left.type in SignedDatatypes)
asmgen.saveRegisterStack(CpuRegister.A, false)
asmgen.saveRegisterStack(CpuRegister.Y, false)
asmgen.assignExpressionToRegister(expr.right, RegisterOrPair.R1, expr.left.type in SignedDatatypes)
asmgen.restoreRegisterStack(CpuRegister.Y, false)
asmgen.restoreRegisterStack(CpuRegister.A, false)
asmgen.out(" sta cx16.r0 | sty cx16.r0+1")
}
asmgen.out(" jsr verafx.muls")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
} else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
}
} }
else -> return false else -> return false
} }
@ -568,8 +586,16 @@ internal class AssignmentAsmGen(private val program: PtProgram,
asmgen.out(" jsr math.mul_word_${value}") asmgen.out(" jsr math.mul_word_${value}")
} }
else { else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier") if(asmgen.options.veraFxMul) {
asmgen.out(" jsr math.multiply_words") asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r1")
asmgen.out("""
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls""")
} else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
}
} }
assignRegisterpairWord(target, RegisterOrPair.AY) assignRegisterpairWord(target, RegisterOrPair.AY)
return true return true

View File

@ -1342,16 +1342,31 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
if(value in asmgen.optimizedWordMultiplications) { if(value in asmgen.optimizedWordMultiplications) {
asmgen.out(" lda $name | ldy $name+1 | jsr math.mul_word_$value | sta $name | sty $name+1") asmgen.out(" lda $name | ldy $name+1 | jsr math.mul_word_$value | sta $name | sty $name+1")
} else { } else {
asmgen.out(""" if(asmgen.options.veraFxMul) {
lda $name asmgen.out("""
sta math.multiply_words.multiplier lda $name
lda $name+1 ldy $name+1
sta math.multiply_words.multiplier+1 sta cx16.r0
lda #<$value sty cx16.r0+1
ldy #>$value lda #<$value
jsr math.multiply_words ldy #>$value
sta $name sta cx16.r1
sty $name+1""") sty cx16.r1+1
jsr verafx.muls
sta $name
sty $name+1""")
} else {
asmgen.out("""
lda $name
sta math.multiply_words.multiplier
lda $name+1
sta math.multiply_words.multiplier+1
lda #<$value
ldy #>$value
jsr math.multiply_words
sta $name
sty $name+1""")
}
} }
} }
"/" -> { "/" -> {
@ -1794,23 +1809,45 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
sta $name+1""") sta $name+1""")
} }
"*" -> { "*" -> {
if(valueDt==DataType.UBYTE) { if(asmgen.options.veraFxMul) {
asmgen.out(" lda $otherName | sta math.multiply_words.multiplier") if(valueDt==DataType.UBYTE) {
if(asmgen.isTargetCpu(CpuType.CPU65c02)) asmgen.out(" lda $otherName | sta cx16.r1")
asmgen.out(" stz math.multiply_words.multiplier+1") if(asmgen.isTargetCpu(CpuType.CPU65c02))
else asmgen.out(" stz cx16.r1+1")
asmgen.out(" lda #0 | sta math.multiply_words.multiplier+1") else
} else { asmgen.out(" lda #0 | sta cx16.r1+1")
asmgen.out(" lda $otherName") } else {
asmgen.signExtendAYlsb(valueDt) asmgen.out(" lda $otherName")
asmgen.out(" sta math.multiply_words.multiplier | sty math.multiply_words.multiplier+1") asmgen.signExtendAYlsb(valueDt)
} asmgen.out(" sta cx16.r1 | sty cx16.r1+1")
asmgen.out(""" }
asmgen.out("""
lda $name lda $name
ldy $name+1 ldy $name+1
jsr math.multiply_words sta cx16.r0
sty cx16.r0+1
jsr verafx.muls
sta $name sta $name
sty $name+1""") sty $name+1""")
} else {
if(valueDt==DataType.UBYTE) {
asmgen.out(" lda $otherName | sta math.multiply_words.multiplier")
if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz math.multiply_words.multiplier+1")
else
asmgen.out(" lda #0 | sta math.multiply_words.multiplier+1")
} else {
asmgen.out(" lda $otherName")
asmgen.signExtendAYlsb(valueDt)
asmgen.out(" sta math.multiply_words.multiplier | sty math.multiply_words.multiplier+1")
}
asmgen.out("""
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
}
} }
"/" -> { "/" -> {
if(dt==DataType.UWORD) { if(dt==DataType.UWORD) {
@ -1939,16 +1976,31 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1") "+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1") "-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
"*" -> { "*" -> {
asmgen.out(""" if(asmgen.options.veraFxMul) {
lda $otherName asmgen.out("""
ldy $otherName+1 lda $name
sta math.multiply_words.multiplier ldy $name+1
sty math.multiply_words.multiplier+1 sta cx16.r0
lda $name sty cx16.r0+1
ldy $name+1 lda $otherName
jsr math.multiply_words ldy $otherName+1
sta $name sta cx16.r1
sty $name+1""") sty cx16.r1+1
jsr verafx.muls
sta $name
sty $name+1""")
} else {
asmgen.out("""
lda $otherName
ldy $otherName+1
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
}
} }
"/" -> { "/" -> {
if(dt==DataType.WORD) { if(dt==DataType.WORD) {
@ -2128,15 +2180,29 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) { private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
fun multiplyVarByWordInAY() { fun multiplyVarByWordInAY() {
asmgen.out(""" if(asmgen.options.veraFxMul) {
sta math.multiply_words.multiplier asmgen.out("""
sty math.multiply_words.multiplier+1 sta cx16.r1
lda $name sty cx16.r1+1
ldy $name+1 lda $name
jsr math.multiply_words ldy $name+1
sta $name sta cx16.r0
sty $name+1 sty cx16.r0+1
""") jsr verafx.muls
sta $name
sty $name+1
""")
} else {
asmgen.out("""
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1
""")
}
} }
fun divideVarByWordInAY() { fun divideVarByWordInAY() {

View File

@ -4,6 +4,7 @@
; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit ; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit
verafx { verafx {
%option no_symbol_prefixing
; unsigned multiplication just passes the values as signed to muls ; unsigned multiplication just passes the values as signed to muls
; if you do this yourself in your call to muls, it will save a few instructions. ; if you do this yourself in your call to muls, it will save a few instructions.

View File

@ -50,6 +50,7 @@ private fun compileMain(args: Array<String>): Boolean {
val includeSourcelines by cli.option(ArgType.Boolean, fullName = "sourcelines", description = "include original Prog8 source lines in generated asm code") val includeSourcelines by cli.option(ArgType.Boolean, fullName = "sourcelines", description = "include original Prog8 source lines in generated asm code")
val splitWordArrays by cli.option(ArgType.Boolean, fullName = "splitarrays", description = "treat all word arrays as tagged with @split to make them lsb/msb split in memory") val splitWordArrays by cli.option(ArgType.Boolean, fullName = "splitarrays", description = "treat all word arrays as tagged with @split to make them lsb/msb split in memory")
val compilationTarget by cli.option(ArgType.String, fullName = "target", description = "target output of the compiler (one of '${C64Target.NAME}', '${C128Target.NAME}', '${Cx16Target.NAME}', '${AtariTarget.NAME}', '${PETTarget.NAME}', '${VMTarget.NAME}') (required)") val compilationTarget by cli.option(ArgType.String, fullName = "target", description = "target output of the compiler (one of '${C64Target.NAME}', '${C128Target.NAME}', '${Cx16Target.NAME}', '${AtariTarget.NAME}', '${PETTarget.NAME}', '${VMTarget.NAME}') (required)")
val veraFxMul by cli.option(ArgType.Boolean, fullName = "verafxmul", description = "use Vera Fx hardware assisted word multiplications (cx16 target only)")
val startVm by cli.option(ArgType.Boolean, fullName = "vm", description = "load and run a .p8ir IR source file in the VM") val startVm by cli.option(ArgType.Boolean, fullName = "vm", description = "load and run a .p8ir IR source file in the VM")
val watchMode by cli.option(ArgType.Boolean, fullName = "watch", description = "continuous compilation mode (watch for file changes)") val watchMode by cli.option(ArgType.Boolean, fullName = "watch", description = "continuous compilation mode (watch for file changes)")
val varsHighBank by cli.option(ArgType.Int, fullName = "varshigh", description = "put uninitialized variables in high memory area instead of at the end of the program. On the cx16 target the value specifies the HiRAM bank to use, on other systems this value is ignored.") val varsHighBank by cli.option(ArgType.Int, fullName = "varshigh", description = "put uninitialized variables in high memory area instead of at the end of the program. On the cx16 target the value specifies the HiRAM bank to use, on other systems this value is ignored.")
@ -95,6 +96,11 @@ private fun compileMain(args: Array<String>): Boolean {
return false return false
} }
if(veraFxMul==true && compilationTarget!=Cx16Target.NAME) {
System.err.println("Vera Fx word multiplications are only available on the Commander X16.")
return false
}
if(startVm==true) { if(startVm==true) {
return runVm(moduleFiles.first()) return runVm(moduleFiles.first())
} }
@ -122,6 +128,7 @@ private fun compileMain(args: Array<String>): Boolean {
varsHighBank, varsHighBank,
compilationTarget!!, compilationTarget!!,
splitWordArrays == true, splitWordArrays == true,
veraFxMul == true,
processedSymbols, processedSymbols,
srcdirs, srcdirs,
outputPath outputPath
@ -190,6 +197,7 @@ private fun compileMain(args: Array<String>): Boolean {
varsHighBank, varsHighBank,
compilationTarget!!, compilationTarget!!,
splitWordArrays == true, splitWordArrays == true,
veraFxMul == true,
processedSymbols, processedSymbols,
srcdirs, srcdirs,
outputPath outputPath

View File

@ -38,6 +38,7 @@ class CompilerArguments(val filepath: Path,
val varsHighBank: Int?, val varsHighBank: Int?,
val compilationTarget: String, val compilationTarget: String,
val splitWordArrays: Boolean, val splitWordArrays: Boolean,
val veraFxMul: Boolean,
val symbolDefs: Map<String, String>, val symbolDefs: Map<String, String>,
val sourceDirs: List<String> = emptyList(), val sourceDirs: List<String> = emptyList(),
val outputDir: Path = Path(""), val outputDir: Path = Path(""),
@ -64,7 +65,7 @@ fun compileProgram(args: CompilerArguments): CompilationResult? {
try { try {
val totalTime = measureTimeMillis { val totalTime = measureTimeMillis {
val (programresult, options, imported) = parseMainModule(args.filepath, args.errors, compTarget, args.sourceDirs) val (programresult, options, imported) = parseMainModule(args.filepath, args.errors, compTarget, args.veraFxMul, args.sourceDirs)
compilationOptions = options compilationOptions = options
with(compilationOptions) { with(compilationOptions) {
@ -76,6 +77,7 @@ fun compileProgram(args: CompilerArguments): CompilationResult? {
experimentalCodegen = args.experimentalCodegen experimentalCodegen = args.experimentalCodegen
varsHighBank = args.varsHighBank varsHighBank = args.varsHighBank
splitWordArrays = args.splitWordArrays splitWordArrays = args.splitWordArrays
veraFxMul = args.veraFxMul
outputDir = args.outputDir.normalize() outputDir = args.outputDir.normalize()
symbolDefs = args.symbolDefs symbolDefs = args.symbolDefs
} }
@ -232,6 +234,7 @@ private class BuiltinFunctionsFacade(functions: Map<String, FSignature>): IBuilt
fun parseMainModule(filepath: Path, fun parseMainModule(filepath: Path,
errors: IErrorReporter, errors: IErrorReporter,
compTarget: ICompilationTarget, compTarget: ICompilationTarget,
veraFxMul: Boolean,
sourceDirs: List<String>): Triple<Program, CompilationOptions, List<Path>> { sourceDirs: List<String>): Triple<Program, CompilationOptions, List<Path>> {
val bf = BuiltinFunctionsFacade(BuiltinFunctions) val bf = BuiltinFunctionsFacade(BuiltinFunctions)
val program = Program(filepath.nameWithoutExtension, bf, compTarget, compTarget) val program = Program(filepath.nameWithoutExtension, bf, compTarget, compTarget)
@ -250,9 +253,10 @@ fun parseMainModule(filepath: Path,
for(lib in compTarget.machine.importLibs(compilerOptions, compTarget.name)) for(lib in compTarget.machine.importLibs(compilerOptions, compTarget.name))
importer.importImplicitLibraryModule(lib) importer.importImplicitLibraryModule(lib)
if(compilerOptions.compTarget.name!=VMTarget.NAME && !compilerOptions.experimentalCodegen) { if(compilerOptions.compTarget.name!=VMTarget.NAME && !compilerOptions.experimentalCodegen)
importer.importImplicitLibraryModule("math") importer.importImplicitLibraryModule("math")
} if(veraFxMul)
importer.importImplicitLibraryModule("verafx")
importer.importImplicitLibraryModule("prog8_lib") importer.importImplicitLibraryModule("prog8_lib")
if (compilerOptions.launcher == CbmPrgLauncherType.BASIC && compilerOptions.output != OutputType.PRG) if (compilerOptions.launcher == CbmPrgLauncherType.BASIC && compilerOptions.output != OutputType.PRG)

View File

@ -35,6 +35,7 @@ private fun compileTheThing(filepath: Path, optimize: Boolean, target: ICompilat
varsHighBank = null, varsHighBank = null,
compilationTarget = target.name, compilationTarget = target.name,
splitWordArrays = false, splitWordArrays = false,
veraFxMul = false,
symbolDefs = emptyMap(), symbolDefs = emptyMap(),
outputDir = outputDir outputDir = outputDir
) )

View File

@ -52,6 +52,7 @@ class TestCompilerOptionSourcedirs: FunSpec({
varsHighBank = null, varsHighBank = null,
compilationTarget = Cx16Target.NAME, compilationTarget = Cx16Target.NAME,
splitWordArrays = false, splitWordArrays = false,
veraFxMul = false,
symbolDefs = emptyMap(), symbolDefs = emptyMap(),
sourceDirs, sourceDirs,
outputDir outputDir

View File

@ -87,7 +87,7 @@ main {
val filenameBase = "on_the_fly_test_" + sourceText.hashCode().toUInt().toString(16) val filenameBase = "on_the_fly_test_" + sourceText.hashCode().toUInt().toString(16)
val filepath = outputDir.resolve("$filenameBase.p8") val filepath = outputDir.resolve("$filenameBase.p8")
filepath.toFile().writeText(sourceText) filepath.toFile().writeText(sourceText)
val (program, options, importedfiles) = parseMainModule(filepath, errors, C64Target(), emptyList()) val (program, options, importedfiles) = parseMainModule(filepath, errors, C64Target(), false, emptyList())
program.toplevelModule.name shouldBe filenameBase program.toplevelModule.name shouldBe filenameBase
withClue("all imports other than the test source must have been internal resources library files") { withClue("all imports other than the test source must have been internal resources library files") {

View File

@ -34,7 +34,8 @@ internal fun compileFile(
symbolDefs = emptyMap(), symbolDefs = emptyMap(),
outputDir = outputDir, outputDir = outputDir,
errors = errors ?: ErrorReporterForTests(), errors = errors ?: ErrorReporterForTests(),
splitWordArrays = false splitWordArrays = false,
veraFxMul = false
) )
return compileProgram(args) return compileProgram(args)
} }

View File

@ -179,6 +179,14 @@ One or more .p8 module files
This removes the need to add @split yourself but some programs may fail to compile with This removes the need to add @split yourself but some programs may fail to compile with
this option as not all array operations are implemented yet on split arrays. this option as not all array operations are implemented yet on split arrays.
``-verafxmul``
Use Vera Fx hardware assisted word multiplication routines (only on the Commander X16)
These are expected to be several times faster than the regular cpu routine,
but could interfere with regular Vera code. Also they use the 4 bytes at the top of video ram
just before the Vera PSG registers.
Use this option only if you know what you're doing, otherwise, calling ``verafx.muls()``
manually gives you more control where this vera routine is used or not.
``-vm`` ``-vm``
load and run a p8-virt or p8-ir listing in the internal VirtualMachine instead of compiling a prog8 program file.. load and run a p8-virt or p8-ir listing in the internal VirtualMachine instead of compiling a prog8 program file..

View File

@ -73,6 +73,7 @@ Language features
- High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting. - High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting.
- Programs can be run multiple times without reloading because of automatic variable (re)initializations. - Programs can be run multiple times without reloading because of automatic variable (re)initializations.
- Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines. - Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines.
- Support for low level system features such as (optional) transparent use of the Vera Fx hardware assisted word multiplication on the Commander X16
- If you only use standard Kernal and core prog8 library routines, it is sometimes possible to compile the *exact same program* for different machines (just change the compilation target flag) - If you only use standard Kernal and core prog8 library routines, it is sometimes possible to compile the *exact same program* for different machines (just change the compilation target flag)

View File

@ -1,8 +1,7 @@
TODO TODO
==== ====
- why is petscii \n translated to $8d and not $0d? and \r IS translated to $0d? - add a compiler switch to replace all calls to the math word mul routine on the X16 by the verafx call instead. Search TODO("vera fx mul")
- add a compiler switch to replace all calls to the math word mul routine on the X16 by the verafx call instead.
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
- [on branch: ir-less-branch-opcodes] IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction - [on branch: ir-less-branch-opcodes] IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
- IR: reduce amount of CMP/CMPI after instructions that set the status bits correctly (LOADs? INC? etc), but only after setting the status bits is verified! - IR: reduce amount of CMP/CMPI after instructions that set the status bits correctly (LOADs? INC? etc), but only after setting the status bits is verified!

View File

@ -7,10 +7,33 @@
main { main {
sub start() { sub start() {
txt.print("petscii \\r=") word w1 = -123
txt.print_ub('\r') word w2 = 222
txt.print(" and \\n=") ubyte b2 = 222
txt.print_ub('\n') byte sb2 = 111
txt.print_w(w1*w2)
txt.nl()
txt.print_w(w1*222)
txt.nl()
w1 = -123
w1 *= 222
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= w2
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= (w2-1)
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= b2
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= sb2
txt.print_w(w1)
txt.nl() txt.nl()
; txt.print_uw(math.mul16_last_upper()) ; txt.print_uw(math.mul16_last_upper())

View File

@ -42,6 +42,7 @@ class RequestParser : Take {
asmListfile = false, asmListfile = false,
experimentalCodegen = false, experimentalCodegen = false,
splitWordArrays = false, splitWordArrays = false,
veraFxMul = false,
varsHighBank = null, varsHighBank = null,
) )
compileProgram(args) compileProgram(args)