added -verafxmul compiler option to use vera fx multiplication routine on cx16

This commit is contained in:
Irmen de Jong 2023-10-01 22:02:07 +02:00
parent 755cc4835e
commit 690782bf60
15 changed files with 200 additions and 59 deletions

View File

@ -21,6 +21,7 @@ class CompilationOptions(val output: OutputType,
var experimentalCodegen: Boolean = false, var experimentalCodegen: Boolean = false,
var varsHighBank: Int? = null, var varsHighBank: Int? = null,
var splitWordArrays: Boolean = false, var splitWordArrays: Boolean = false,
var veraFxMul: Boolean = false,
var outputDir: Path = Path(""), var outputDir: Path = Path(""),
var symbolDefs: Map<String, String> = emptyMap() var symbolDefs: Map<String, String> = emptyMap()
) { ) {

View File

@ -544,11 +544,29 @@ internal class AssignmentAsmGen(private val program: PtProgram,
return true return true
} }
in WordDatatypes -> { in WordDatatypes -> {
if(asmgen.options.veraFxMul) {
if(expr.right.isSimple()) {
asmgen.assignExpressionToRegister(expr.left, RegisterOrPair.R0, expr.left.type in SignedDatatypes)
asmgen.assignExpressionToRegister(expr.right, RegisterOrPair.R1, expr.left.type in SignedDatatypes)
} else {
asmgen.assignExpressionToRegister(expr.left, RegisterOrPair.AY, expr.left.type in SignedDatatypes)
asmgen.saveRegisterStack(CpuRegister.A, false)
asmgen.saveRegisterStack(CpuRegister.Y, false)
asmgen.assignExpressionToRegister(expr.right, RegisterOrPair.R1, expr.left.type in SignedDatatypes)
asmgen.restoreRegisterStack(CpuRegister.Y, false)
asmgen.restoreRegisterStack(CpuRegister.A, false)
asmgen.out(" sta cx16.r0 | sty cx16.r0+1")
}
asmgen.out(" jsr verafx.muls")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
} else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier") asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words") asmgen.out(" jsr math.multiply_words")
assignRegisterpairWord(target, RegisterOrPair.AY) assignRegisterpairWord(target, RegisterOrPair.AY)
return true return true
} }
}
else -> return false else -> return false
} }
} else { } else {
@ -568,9 +586,17 @@ internal class AssignmentAsmGen(private val program: PtProgram,
asmgen.out(" jsr math.mul_word_${value}") asmgen.out(" jsr math.mul_word_${value}")
} }
else { else {
if(asmgen.options.veraFxMul) {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r1")
asmgen.out("""
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls""")
} else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier") asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words") asmgen.out(" jsr math.multiply_words")
} }
}
assignRegisterpairWord(target, RegisterOrPair.AY) assignRegisterpairWord(target, RegisterOrPair.AY)
return true return true
} }

View File

@ -1341,6 +1341,20 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
// the mul code works for both signed and unsigned // the mul code works for both signed and unsigned
if(value in asmgen.optimizedWordMultiplications) { if(value in asmgen.optimizedWordMultiplications) {
asmgen.out(" lda $name | ldy $name+1 | jsr math.mul_word_$value | sta $name | sty $name+1") asmgen.out(" lda $name | ldy $name+1 | jsr math.mul_word_$value | sta $name | sty $name+1")
} else {
if(asmgen.options.veraFxMul) {
asmgen.out("""
lda $name
ldy $name+1
sta cx16.r0
sty cx16.r0+1
lda #<$value
ldy #>$value
sta cx16.r1
sty cx16.r1+1
jsr verafx.muls
sta $name
sty $name+1""")
} else { } else {
asmgen.out(""" asmgen.out("""
lda $name lda $name
@ -1354,6 +1368,7 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
sty $name+1""") sty $name+1""")
} }
} }
}
"/" -> { "/" -> {
if(value==0) if(value==0)
throw AssemblyError("division by zero") throw AssemblyError("division by zero")
@ -1794,6 +1809,27 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
sta $name+1""") sta $name+1""")
} }
"*" -> { "*" -> {
if(asmgen.options.veraFxMul) {
if(valueDt==DataType.UBYTE) {
asmgen.out(" lda $otherName | sta cx16.r1")
if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz cx16.r1+1")
else
asmgen.out(" lda #0 | sta cx16.r1+1")
} else {
asmgen.out(" lda $otherName")
asmgen.signExtendAYlsb(valueDt)
asmgen.out(" sta cx16.r1 | sty cx16.r1+1")
}
asmgen.out("""
lda $name
ldy $name+1
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls
sta $name
sty $name+1""")
} else {
if(valueDt==DataType.UBYTE) { if(valueDt==DataType.UBYTE) {
asmgen.out(" lda $otherName | sta math.multiply_words.multiplier") asmgen.out(" lda $otherName | sta math.multiply_words.multiplier")
if(asmgen.isTargetCpu(CpuType.CPU65c02)) if(asmgen.isTargetCpu(CpuType.CPU65c02))
@ -1812,6 +1848,7 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
sta $name sta $name
sty $name+1""") sty $name+1""")
} }
}
"/" -> { "/" -> {
if(dt==DataType.UWORD) { if(dt==DataType.UWORD) {
asmgen.out(""" asmgen.out("""
@ -1939,6 +1976,20 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1") "+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1") "-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
"*" -> { "*" -> {
if(asmgen.options.veraFxMul) {
asmgen.out("""
lda $name
ldy $name+1
sta cx16.r0
sty cx16.r0+1
lda $otherName
ldy $otherName+1
sta cx16.r1
sty cx16.r1+1
jsr verafx.muls
sta $name
sty $name+1""")
} else {
asmgen.out(""" asmgen.out("""
lda $otherName lda $otherName
ldy $otherName+1 ldy $otherName+1
@ -1950,6 +2001,7 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
sta $name sta $name
sty $name+1""") sty $name+1""")
} }
}
"/" -> { "/" -> {
if(dt==DataType.WORD) { if(dt==DataType.WORD) {
asmgen.out(""" asmgen.out("""
@ -2128,6 +2180,19 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) { private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
fun multiplyVarByWordInAY() { fun multiplyVarByWordInAY() {
if(asmgen.options.veraFxMul) {
asmgen.out("""
sta cx16.r1
sty cx16.r1+1
lda $name
ldy $name+1
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls
sta $name
sty $name+1
""")
} else {
asmgen.out(""" asmgen.out("""
sta math.multiply_words.multiplier sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1 sty math.multiply_words.multiplier+1
@ -2138,6 +2203,7 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
sty $name+1 sty $name+1
""") """)
} }
}
fun divideVarByWordInAY() { fun divideVarByWordInAY() {
asmgen.out(""" asmgen.out("""

View File

@ -4,6 +4,7 @@
; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit ; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit
verafx { verafx {
%option no_symbol_prefixing
; unsigned multiplication just passes the values as signed to muls ; unsigned multiplication just passes the values as signed to muls
; if you do this yourself in your call to muls, it will save a few instructions. ; if you do this yourself in your call to muls, it will save a few instructions.

View File

@ -50,6 +50,7 @@ private fun compileMain(args: Array<String>): Boolean {
val includeSourcelines by cli.option(ArgType.Boolean, fullName = "sourcelines", description = "include original Prog8 source lines in generated asm code") val includeSourcelines by cli.option(ArgType.Boolean, fullName = "sourcelines", description = "include original Prog8 source lines in generated asm code")
val splitWordArrays by cli.option(ArgType.Boolean, fullName = "splitarrays", description = "treat all word arrays as tagged with @split to make them lsb/msb split in memory") val splitWordArrays by cli.option(ArgType.Boolean, fullName = "splitarrays", description = "treat all word arrays as tagged with @split to make them lsb/msb split in memory")
val compilationTarget by cli.option(ArgType.String, fullName = "target", description = "target output of the compiler (one of '${C64Target.NAME}', '${C128Target.NAME}', '${Cx16Target.NAME}', '${AtariTarget.NAME}', '${PETTarget.NAME}', '${VMTarget.NAME}') (required)") val compilationTarget by cli.option(ArgType.String, fullName = "target", description = "target output of the compiler (one of '${C64Target.NAME}', '${C128Target.NAME}', '${Cx16Target.NAME}', '${AtariTarget.NAME}', '${PETTarget.NAME}', '${VMTarget.NAME}') (required)")
val veraFxMul by cli.option(ArgType.Boolean, fullName = "verafxmul", description = "use Vera Fx hardware assisted word multiplications (cx16 target only)")
val startVm by cli.option(ArgType.Boolean, fullName = "vm", description = "load and run a .p8ir IR source file in the VM") val startVm by cli.option(ArgType.Boolean, fullName = "vm", description = "load and run a .p8ir IR source file in the VM")
val watchMode by cli.option(ArgType.Boolean, fullName = "watch", description = "continuous compilation mode (watch for file changes)") val watchMode by cli.option(ArgType.Boolean, fullName = "watch", description = "continuous compilation mode (watch for file changes)")
val varsHighBank by cli.option(ArgType.Int, fullName = "varshigh", description = "put uninitialized variables in high memory area instead of at the end of the program. On the cx16 target the value specifies the HiRAM bank to use, on other systems this value is ignored.") val varsHighBank by cli.option(ArgType.Int, fullName = "varshigh", description = "put uninitialized variables in high memory area instead of at the end of the program. On the cx16 target the value specifies the HiRAM bank to use, on other systems this value is ignored.")
@ -95,6 +96,11 @@ private fun compileMain(args: Array<String>): Boolean {
return false return false
} }
if(veraFxMul==true && compilationTarget!=Cx16Target.NAME) {
System.err.println("Vera Fx word multiplications are only available on the Commander X16.")
return false
}
if(startVm==true) { if(startVm==true) {
return runVm(moduleFiles.first()) return runVm(moduleFiles.first())
} }
@ -122,6 +128,7 @@ private fun compileMain(args: Array<String>): Boolean {
varsHighBank, varsHighBank,
compilationTarget!!, compilationTarget!!,
splitWordArrays == true, splitWordArrays == true,
veraFxMul == true,
processedSymbols, processedSymbols,
srcdirs, srcdirs,
outputPath outputPath
@ -190,6 +197,7 @@ private fun compileMain(args: Array<String>): Boolean {
varsHighBank, varsHighBank,
compilationTarget!!, compilationTarget!!,
splitWordArrays == true, splitWordArrays == true,
veraFxMul == true,
processedSymbols, processedSymbols,
srcdirs, srcdirs,
outputPath outputPath

View File

@ -38,6 +38,7 @@ class CompilerArguments(val filepath: Path,
val varsHighBank: Int?, val varsHighBank: Int?,
val compilationTarget: String, val compilationTarget: String,
val splitWordArrays: Boolean, val splitWordArrays: Boolean,
val veraFxMul: Boolean,
val symbolDefs: Map<String, String>, val symbolDefs: Map<String, String>,
val sourceDirs: List<String> = emptyList(), val sourceDirs: List<String> = emptyList(),
val outputDir: Path = Path(""), val outputDir: Path = Path(""),
@ -64,7 +65,7 @@ fun compileProgram(args: CompilerArguments): CompilationResult? {
try { try {
val totalTime = measureTimeMillis { val totalTime = measureTimeMillis {
val (programresult, options, imported) = parseMainModule(args.filepath, args.errors, compTarget, args.sourceDirs) val (programresult, options, imported) = parseMainModule(args.filepath, args.errors, compTarget, args.veraFxMul, args.sourceDirs)
compilationOptions = options compilationOptions = options
with(compilationOptions) { with(compilationOptions) {
@ -76,6 +77,7 @@ fun compileProgram(args: CompilerArguments): CompilationResult? {
experimentalCodegen = args.experimentalCodegen experimentalCodegen = args.experimentalCodegen
varsHighBank = args.varsHighBank varsHighBank = args.varsHighBank
splitWordArrays = args.splitWordArrays splitWordArrays = args.splitWordArrays
veraFxMul = args.veraFxMul
outputDir = args.outputDir.normalize() outputDir = args.outputDir.normalize()
symbolDefs = args.symbolDefs symbolDefs = args.symbolDefs
} }
@ -232,6 +234,7 @@ private class BuiltinFunctionsFacade(functions: Map<String, FSignature>): IBuilt
fun parseMainModule(filepath: Path, fun parseMainModule(filepath: Path,
errors: IErrorReporter, errors: IErrorReporter,
compTarget: ICompilationTarget, compTarget: ICompilationTarget,
veraFxMul: Boolean,
sourceDirs: List<String>): Triple<Program, CompilationOptions, List<Path>> { sourceDirs: List<String>): Triple<Program, CompilationOptions, List<Path>> {
val bf = BuiltinFunctionsFacade(BuiltinFunctions) val bf = BuiltinFunctionsFacade(BuiltinFunctions)
val program = Program(filepath.nameWithoutExtension, bf, compTarget, compTarget) val program = Program(filepath.nameWithoutExtension, bf, compTarget, compTarget)
@ -250,9 +253,10 @@ fun parseMainModule(filepath: Path,
for(lib in compTarget.machine.importLibs(compilerOptions, compTarget.name)) for(lib in compTarget.machine.importLibs(compilerOptions, compTarget.name))
importer.importImplicitLibraryModule(lib) importer.importImplicitLibraryModule(lib)
if(compilerOptions.compTarget.name!=VMTarget.NAME && !compilerOptions.experimentalCodegen) { if(compilerOptions.compTarget.name!=VMTarget.NAME && !compilerOptions.experimentalCodegen)
importer.importImplicitLibraryModule("math") importer.importImplicitLibraryModule("math")
} if(veraFxMul)
importer.importImplicitLibraryModule("verafx")
importer.importImplicitLibraryModule("prog8_lib") importer.importImplicitLibraryModule("prog8_lib")
if (compilerOptions.launcher == CbmPrgLauncherType.BASIC && compilerOptions.output != OutputType.PRG) if (compilerOptions.launcher == CbmPrgLauncherType.BASIC && compilerOptions.output != OutputType.PRG)

View File

@ -35,6 +35,7 @@ private fun compileTheThing(filepath: Path, optimize: Boolean, target: ICompilat
varsHighBank = null, varsHighBank = null,
compilationTarget = target.name, compilationTarget = target.name,
splitWordArrays = false, splitWordArrays = false,
veraFxMul = false,
symbolDefs = emptyMap(), symbolDefs = emptyMap(),
outputDir = outputDir outputDir = outputDir
) )

View File

@ -52,6 +52,7 @@ class TestCompilerOptionSourcedirs: FunSpec({
varsHighBank = null, varsHighBank = null,
compilationTarget = Cx16Target.NAME, compilationTarget = Cx16Target.NAME,
splitWordArrays = false, splitWordArrays = false,
veraFxMul = false,
symbolDefs = emptyMap(), symbolDefs = emptyMap(),
sourceDirs, sourceDirs,
outputDir outputDir

View File

@ -87,7 +87,7 @@ main {
val filenameBase = "on_the_fly_test_" + sourceText.hashCode().toUInt().toString(16) val filenameBase = "on_the_fly_test_" + sourceText.hashCode().toUInt().toString(16)
val filepath = outputDir.resolve("$filenameBase.p8") val filepath = outputDir.resolve("$filenameBase.p8")
filepath.toFile().writeText(sourceText) filepath.toFile().writeText(sourceText)
val (program, options, importedfiles) = parseMainModule(filepath, errors, C64Target(), emptyList()) val (program, options, importedfiles) = parseMainModule(filepath, errors, C64Target(), false, emptyList())
program.toplevelModule.name shouldBe filenameBase program.toplevelModule.name shouldBe filenameBase
withClue("all imports other than the test source must have been internal resources library files") { withClue("all imports other than the test source must have been internal resources library files") {

View File

@ -34,7 +34,8 @@ internal fun compileFile(
symbolDefs = emptyMap(), symbolDefs = emptyMap(),
outputDir = outputDir, outputDir = outputDir,
errors = errors ?: ErrorReporterForTests(), errors = errors ?: ErrorReporterForTests(),
splitWordArrays = false splitWordArrays = false,
veraFxMul = false
) )
return compileProgram(args) return compileProgram(args)
} }

View File

@ -179,6 +179,14 @@ One or more .p8 module files
This removes the need to add @split yourself but some programs may fail to compile with This removes the need to add @split yourself but some programs may fail to compile with
this option as not all array operations are implemented yet on split arrays. this option as not all array operations are implemented yet on split arrays.
``-verafxmul``
Use Vera Fx hardware assisted word multiplication routines (only on the Commander X16)
These are expected to be several times faster than the regular cpu routine,
but could interfere with regular Vera code. Also they use the 4 bytes at the top of video ram
just before the Vera PSG registers.
Use this option only if you know what you're doing, otherwise, calling ``verafx.muls()``
manually gives you more control where this vera routine is used or not.
``-vm`` ``-vm``
load and run a p8-virt or p8-ir listing in the internal VirtualMachine instead of compiling a prog8 program file.. load and run a p8-virt or p8-ir listing in the internal VirtualMachine instead of compiling a prog8 program file..

View File

@ -73,6 +73,7 @@ Language features
- High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting. - High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting.
- Programs can be run multiple times without reloading because of automatic variable (re)initializations. - Programs can be run multiple times without reloading because of automatic variable (re)initializations.
- Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines. - Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines.
- Support for low level system features such as (optional) transparent use of the Vera Fx hardware assisted word multiplication on the Commander X16
- If you only use standard Kernal and core prog8 library routines, it is sometimes possible to compile the *exact same program* for different machines (just change the compilation target flag) - If you only use standard Kernal and core prog8 library routines, it is sometimes possible to compile the *exact same program* for different machines (just change the compilation target flag)

View File

@ -1,8 +1,7 @@
TODO TODO
==== ====
- why is petscii \n translated to $8d and not $0d? and \r IS translated to $0d? - add a compiler switch to replace all calls to the math word mul routine on the X16 by the verafx call instead. Search TODO("vera fx mul")
- add a compiler switch to replace all calls to the math word mul routine on the X16 by the verafx call instead.
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
- [on branch: ir-less-branch-opcodes] IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction - [on branch: ir-less-branch-opcodes] IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
- IR: reduce amount of CMP/CMPI after instructions that set the status bits correctly (LOADs? INC? etc), but only after setting the status bits is verified! - IR: reduce amount of CMP/CMPI after instructions that set the status bits correctly (LOADs? INC? etc), but only after setting the status bits is verified!

View File

@ -7,10 +7,33 @@
main { main {
sub start() { sub start() {
txt.print("petscii \\r=") word w1 = -123
txt.print_ub('\r') word w2 = 222
txt.print(" and \\n=") ubyte b2 = 222
txt.print_ub('\n') byte sb2 = 111
txt.print_w(w1*w2)
txt.nl()
txt.print_w(w1*222)
txt.nl()
w1 = -123
w1 *= 222
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= w2
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= (w2-1)
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= b2
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= sb2
txt.print_w(w1)
txt.nl() txt.nl()
; txt.print_uw(math.mul16_last_upper()) ; txt.print_uw(math.mul16_last_upper())

View File

@ -42,6 +42,7 @@ class RequestParser : Take {
asmListfile = false, asmListfile = false,
experimentalCodegen = false, experimentalCodegen = false,
splitWordArrays = false, splitWordArrays = false,
veraFxMul = false,
varsHighBank = null, varsHighBank = null,
) )
compileProgram(args) compileProgram(args)