added -verafxmul compiler option to use vera fx multiplication routine on cx16

This commit is contained in:
Irmen de Jong 2023-10-01 22:02:07 +02:00
parent 755cc4835e
commit 690782bf60
15 changed files with 200 additions and 59 deletions

View File

@ -21,6 +21,7 @@ class CompilationOptions(val output: OutputType,
var experimentalCodegen: Boolean = false,
var varsHighBank: Int? = null,
var splitWordArrays: Boolean = false,
var veraFxMul: Boolean = false,
var outputDir: Path = Path(""),
var symbolDefs: Map<String, String> = emptyMap()
) {

View File

@ -544,10 +544,28 @@ internal class AssignmentAsmGen(private val program: PtProgram,
return true
}
in WordDatatypes -> {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
if(asmgen.options.veraFxMul) {
if(expr.right.isSimple()) {
asmgen.assignExpressionToRegister(expr.left, RegisterOrPair.R0, expr.left.type in SignedDatatypes)
asmgen.assignExpressionToRegister(expr.right, RegisterOrPair.R1, expr.left.type in SignedDatatypes)
} else {
asmgen.assignExpressionToRegister(expr.left, RegisterOrPair.AY, expr.left.type in SignedDatatypes)
asmgen.saveRegisterStack(CpuRegister.A, false)
asmgen.saveRegisterStack(CpuRegister.Y, false)
asmgen.assignExpressionToRegister(expr.right, RegisterOrPair.R1, expr.left.type in SignedDatatypes)
asmgen.restoreRegisterStack(CpuRegister.Y, false)
asmgen.restoreRegisterStack(CpuRegister.A, false)
asmgen.out(" sta cx16.r0 | sty cx16.r0+1")
}
asmgen.out(" jsr verafx.muls")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
} else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
}
}
else -> return false
}
@ -568,8 +586,16 @@ internal class AssignmentAsmGen(private val program: PtProgram,
asmgen.out(" jsr math.mul_word_${value}")
}
else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
if(asmgen.options.veraFxMul) {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r1")
asmgen.out("""
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls""")
} else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
}
}
assignRegisterpairWord(target, RegisterOrPair.AY)
return true

View File

@ -1342,16 +1342,31 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
if(value in asmgen.optimizedWordMultiplications) {
asmgen.out(" lda $name | ldy $name+1 | jsr math.mul_word_$value | sta $name | sty $name+1")
} else {
asmgen.out("""
lda $name
sta math.multiply_words.multiplier
lda $name+1
sta math.multiply_words.multiplier+1
lda #<$value
ldy #>$value
jsr math.multiply_words
sta $name
sty $name+1""")
if(asmgen.options.veraFxMul) {
asmgen.out("""
lda $name
ldy $name+1
sta cx16.r0
sty cx16.r0+1
lda #<$value
ldy #>$value
sta cx16.r1
sty cx16.r1+1
jsr verafx.muls
sta $name
sty $name+1""")
} else {
asmgen.out("""
lda $name
sta math.multiply_words.multiplier
lda $name+1
sta math.multiply_words.multiplier+1
lda #<$value
ldy #>$value
jsr math.multiply_words
sta $name
sty $name+1""")
}
}
}
"/" -> {
@ -1794,23 +1809,45 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
sta $name+1""")
}
"*" -> {
if(valueDt==DataType.UBYTE) {
asmgen.out(" lda $otherName | sta math.multiply_words.multiplier")
if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz math.multiply_words.multiplier+1")
else
asmgen.out(" lda #0 | sta math.multiply_words.multiplier+1")
} else {
asmgen.out(" lda $otherName")
asmgen.signExtendAYlsb(valueDt)
asmgen.out(" sta math.multiply_words.multiplier | sty math.multiply_words.multiplier+1")
}
asmgen.out("""
if(asmgen.options.veraFxMul) {
if(valueDt==DataType.UBYTE) {
asmgen.out(" lda $otherName | sta cx16.r1")
if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz cx16.r1+1")
else
asmgen.out(" lda #0 | sta cx16.r1+1")
} else {
asmgen.out(" lda $otherName")
asmgen.signExtendAYlsb(valueDt)
asmgen.out(" sta cx16.r1 | sty cx16.r1+1")
}
asmgen.out("""
lda $name
ldy $name+1
jsr math.multiply_words
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls
sta $name
sty $name+1""")
} else {
if(valueDt==DataType.UBYTE) {
asmgen.out(" lda $otherName | sta math.multiply_words.multiplier")
if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz math.multiply_words.multiplier+1")
else
asmgen.out(" lda #0 | sta math.multiply_words.multiplier+1")
} else {
asmgen.out(" lda $otherName")
asmgen.signExtendAYlsb(valueDt)
asmgen.out(" sta math.multiply_words.multiplier | sty math.multiply_words.multiplier+1")
}
asmgen.out("""
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
}
}
"/" -> {
if(dt==DataType.UWORD) {
@ -1939,16 +1976,31 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
"*" -> {
asmgen.out("""
lda $otherName
ldy $otherName+1
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
if(asmgen.options.veraFxMul) {
asmgen.out("""
lda $name
ldy $name+1
sta cx16.r0
sty cx16.r0+1
lda $otherName
ldy $otherName+1
sta cx16.r1
sty cx16.r1+1
jsr verafx.muls
sta $name
sty $name+1""")
} else {
asmgen.out("""
lda $otherName
ldy $otherName+1
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
}
}
"/" -> {
if(dt==DataType.WORD) {
@ -2128,15 +2180,29 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
fun multiplyVarByWordInAY() {
asmgen.out("""
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1
""")
if(asmgen.options.veraFxMul) {
asmgen.out("""
sta cx16.r1
sty cx16.r1+1
lda $name
ldy $name+1
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls
sta $name
sty $name+1
""")
} else {
asmgen.out("""
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1
""")
}
}
fun divideVarByWordInAY() {

View File

@ -4,6 +4,7 @@
; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit
verafx {
%option no_symbol_prefixing
; unsigned multiplication just passes the values as signed to muls
; if you do this yourself in your call to muls, it will save a few instructions.

View File

@ -50,6 +50,7 @@ private fun compileMain(args: Array<String>): Boolean {
val includeSourcelines by cli.option(ArgType.Boolean, fullName = "sourcelines", description = "include original Prog8 source lines in generated asm code")
val splitWordArrays by cli.option(ArgType.Boolean, fullName = "splitarrays", description = "treat all word arrays as tagged with @split to make them lsb/msb split in memory")
val compilationTarget by cli.option(ArgType.String, fullName = "target", description = "target output of the compiler (one of '${C64Target.NAME}', '${C128Target.NAME}', '${Cx16Target.NAME}', '${AtariTarget.NAME}', '${PETTarget.NAME}', '${VMTarget.NAME}') (required)")
val veraFxMul by cli.option(ArgType.Boolean, fullName = "verafxmul", description = "use Vera Fx hardware assisted word multiplications (cx16 target only)")
val startVm by cli.option(ArgType.Boolean, fullName = "vm", description = "load and run a .p8ir IR source file in the VM")
val watchMode by cli.option(ArgType.Boolean, fullName = "watch", description = "continuous compilation mode (watch for file changes)")
val varsHighBank by cli.option(ArgType.Int, fullName = "varshigh", description = "put uninitialized variables in high memory area instead of at the end of the program. On the cx16 target the value specifies the HiRAM bank to use, on other systems this value is ignored.")
@ -95,6 +96,11 @@ private fun compileMain(args: Array<String>): Boolean {
return false
}
if(veraFxMul==true && compilationTarget!=Cx16Target.NAME) {
System.err.println("Vera Fx word multiplications are only available on the Commander X16.")
return false
}
if(startVm==true) {
return runVm(moduleFiles.first())
}
@ -122,6 +128,7 @@ private fun compileMain(args: Array<String>): Boolean {
varsHighBank,
compilationTarget!!,
splitWordArrays == true,
veraFxMul == true,
processedSymbols,
srcdirs,
outputPath
@ -190,6 +197,7 @@ private fun compileMain(args: Array<String>): Boolean {
varsHighBank,
compilationTarget!!,
splitWordArrays == true,
veraFxMul == true,
processedSymbols,
srcdirs,
outputPath

View File

@ -38,6 +38,7 @@ class CompilerArguments(val filepath: Path,
val varsHighBank: Int?,
val compilationTarget: String,
val splitWordArrays: Boolean,
val veraFxMul: Boolean,
val symbolDefs: Map<String, String>,
val sourceDirs: List<String> = emptyList(),
val outputDir: Path = Path(""),
@ -64,7 +65,7 @@ fun compileProgram(args: CompilerArguments): CompilationResult? {
try {
val totalTime = measureTimeMillis {
val (programresult, options, imported) = parseMainModule(args.filepath, args.errors, compTarget, args.sourceDirs)
val (programresult, options, imported) = parseMainModule(args.filepath, args.errors, compTarget, args.veraFxMul, args.sourceDirs)
compilationOptions = options
with(compilationOptions) {
@ -76,6 +77,7 @@ fun compileProgram(args: CompilerArguments): CompilationResult? {
experimentalCodegen = args.experimentalCodegen
varsHighBank = args.varsHighBank
splitWordArrays = args.splitWordArrays
veraFxMul = args.veraFxMul
outputDir = args.outputDir.normalize()
symbolDefs = args.symbolDefs
}
@ -232,6 +234,7 @@ private class BuiltinFunctionsFacade(functions: Map<String, FSignature>): IBuilt
fun parseMainModule(filepath: Path,
errors: IErrorReporter,
compTarget: ICompilationTarget,
veraFxMul: Boolean,
sourceDirs: List<String>): Triple<Program, CompilationOptions, List<Path>> {
val bf = BuiltinFunctionsFacade(BuiltinFunctions)
val program = Program(filepath.nameWithoutExtension, bf, compTarget, compTarget)
@ -250,9 +253,10 @@ fun parseMainModule(filepath: Path,
for(lib in compTarget.machine.importLibs(compilerOptions, compTarget.name))
importer.importImplicitLibraryModule(lib)
if(compilerOptions.compTarget.name!=VMTarget.NAME && !compilerOptions.experimentalCodegen) {
if(compilerOptions.compTarget.name!=VMTarget.NAME && !compilerOptions.experimentalCodegen)
importer.importImplicitLibraryModule("math")
}
if(veraFxMul)
importer.importImplicitLibraryModule("verafx")
importer.importImplicitLibraryModule("prog8_lib")
if (compilerOptions.launcher == CbmPrgLauncherType.BASIC && compilerOptions.output != OutputType.PRG)

View File

@ -35,6 +35,7 @@ private fun compileTheThing(filepath: Path, optimize: Boolean, target: ICompilat
varsHighBank = null,
compilationTarget = target.name,
splitWordArrays = false,
veraFxMul = false,
symbolDefs = emptyMap(),
outputDir = outputDir
)

View File

@ -52,6 +52,7 @@ class TestCompilerOptionSourcedirs: FunSpec({
varsHighBank = null,
compilationTarget = Cx16Target.NAME,
splitWordArrays = false,
veraFxMul = false,
symbolDefs = emptyMap(),
sourceDirs,
outputDir

View File

@ -87,7 +87,7 @@ main {
val filenameBase = "on_the_fly_test_" + sourceText.hashCode().toUInt().toString(16)
val filepath = outputDir.resolve("$filenameBase.p8")
filepath.toFile().writeText(sourceText)
val (program, options, importedfiles) = parseMainModule(filepath, errors, C64Target(), emptyList())
val (program, options, importedfiles) = parseMainModule(filepath, errors, C64Target(), false, emptyList())
program.toplevelModule.name shouldBe filenameBase
withClue("all imports other than the test source must have been internal resources library files") {

View File

@ -34,7 +34,8 @@ internal fun compileFile(
symbolDefs = emptyMap(),
outputDir = outputDir,
errors = errors ?: ErrorReporterForTests(),
splitWordArrays = false
splitWordArrays = false,
veraFxMul = false
)
return compileProgram(args)
}

View File

@ -179,6 +179,14 @@ One or more .p8 module files
This removes the need to add @split yourself but some programs may fail to compile with
this option as not all array operations are implemented yet on split arrays.
``-verafxmul``
Use Vera Fx hardware assisted word multiplication routines (only on the Commander X16)
These are expected to be several times faster than the regular cpu routine,
but could interfere with regular Vera code. Also they use the 4 bytes at the top of video ram
just before the Vera PSG registers.
Use this option only if you know what you're doing, otherwise, calling ``verafx.muls()``
manually gives you more control where this vera routine is used or not.
``-vm``
load and run a p8-virt or p8-ir listing in the internal VirtualMachine instead of compiling a prog8 program file..

View File

@ -73,6 +73,7 @@ Language features
- High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting.
- Programs can be run multiple times without reloading because of automatic variable (re)initializations.
- Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines.
- Support for low level system features such as (optional) transparent use of the Vera Fx hardware assisted word multiplication on the Commander X16
- If you only use standard Kernal and core prog8 library routines, it is sometimes possible to compile the *exact same program* for different machines (just change the compilation target flag)

View File

@ -1,8 +1,7 @@
TODO
====
- why is petscii \n translated to $8d and not $0d? and \r IS translated to $0d?
- add a compiler switch to replace all calls to the math word mul routine on the X16 by the verafx call instead.
- add a compiler switch to replace all calls to the math word mul routine on the X16 by the verafx call instead. Search TODO("vera fx mul")
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
- [on branch: ir-less-branch-opcodes] IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
- IR: reduce amount of CMP/CMPI after instructions that set the status bits correctly (LOADs? INC? etc), but only after setting the status bits is verified!

View File

@ -7,10 +7,33 @@
main {
sub start() {
txt.print("petscii \\r=")
txt.print_ub('\r')
txt.print(" and \\n=")
txt.print_ub('\n')
word w1 = -123
word w2 = 222
ubyte b2 = 222
byte sb2 = 111
txt.print_w(w1*w2)
txt.nl()
txt.print_w(w1*222)
txt.nl()
w1 = -123
w1 *= 222
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= w2
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= (w2-1)
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= b2
txt.print_w(w1)
txt.nl()
w1 = -123
w1 *= sb2
txt.print_w(w1)
txt.nl()
; txt.print_uw(math.mul16_last_upper())

View File

@ -42,6 +42,7 @@ class RequestParser : Take {
asmListfile = false,
experimentalCodegen = false,
splitWordArrays = false,
veraFxMul = false,
varsHighBank = null,
)
compileProgram(args)