improved docs about subroutine scoping, fix possible optimizer crash for inlined sub

This commit is contained in:
Irmen de Jong 2023-12-04 22:18:37 +01:00
parent d1806bfdc3
commit 6ebd4e821f
6 changed files with 173 additions and 117 deletions

View File

@ -34,85 +34,95 @@ class Inliner(private val program: Program, private val options: CompilationOpti
val containsSubsOrVariables = subroutine.statements.any { it is VarDecl || it is Subroutine}
if(!containsSubsOrVariables) {
if(subroutine.statements.size==1 || (subroutine.statements.size==2 && isEmptyReturn(subroutine.statements[1]))) {
// subroutine is possible candidate to be inlined
subroutine.inline =
when(val stmt=subroutine.statements[0]) {
is Return -> {
if(stmt.value is NumericLiteral)
true
else if(stmt.value==null)
true
else if (stmt.value is IdentifierReference) {
makeFullyScoped(stmt.value as IdentifierReference)
true
} else if(stmt.value!! is IFunctionCall && (stmt.value as IFunctionCall).args.size<=1 && (stmt.value as IFunctionCall).args.all { it is NumericLiteral || it is IdentifierReference }) {
when (stmt.value) {
is BuiltinFunctionCall -> {
makeFullyScoped(stmt.value as BuiltinFunctionCall)
true
if(subroutine !== program.entrypoint) {
// subroutine is possible candidate to be inlined
subroutine.inline =
when (val stmt = subroutine.statements[0]) {
is Return -> {
if (stmt.value is NumericLiteral)
true
else if (stmt.value == null)
true
else if (stmt.value is IdentifierReference) {
makeFullyScoped(stmt.value as IdentifierReference)
true
} else if (stmt.value!! is IFunctionCall && (stmt.value as IFunctionCall).args.size <= 1 && (stmt.value as IFunctionCall).args.all { it is NumericLiteral || it is IdentifierReference }) {
when (stmt.value) {
is BuiltinFunctionCall -> {
makeFullyScoped(stmt.value as BuiltinFunctionCall)
true
}
is FunctionCallExpression -> {
makeFullyScoped(stmt.value as FunctionCallExpression)
true
}
else -> false
}
is FunctionCallExpression -> {
makeFullyScoped(stmt.value as FunctionCallExpression)
true
}
else -> false
}
} else
false
}
is Assignment -> {
if(stmt.value.isSimple) {
val targetInline =
if(stmt.target.identifier!=null) {
makeFullyScoped(stmt.target.identifier!!)
true
} else if(stmt.target.memoryAddress?.addressExpression is NumericLiteral || stmt.target.memoryAddress?.addressExpression is IdentifierReference) {
if(stmt.target.memoryAddress?.addressExpression is IdentifierReference)
makeFullyScoped(stmt.target.memoryAddress?.addressExpression as IdentifierReference)
true
} else
false
val valueInline =
if(stmt.value is IdentifierReference) {
makeFullyScoped(stmt.value as IdentifierReference)
true
} else if((stmt.value as? DirectMemoryRead)?.addressExpression is NumericLiteral || (stmt.value as? DirectMemoryRead)?.addressExpression is IdentifierReference) {
if((stmt.value as? DirectMemoryRead)?.addressExpression is IdentifierReference)
makeFullyScoped((stmt.value as? DirectMemoryRead)?.addressExpression as IdentifierReference)
true
} else
false
targetInline || valueInline
} else
false
}
is BuiltinFunctionCallStatement -> {
val inline = stmt.args.size<=1 && stmt.args.all { it is NumericLiteral || it is IdentifierReference }
if(inline)
makeFullyScoped(stmt)
inline
}
is FunctionCallStatement -> {
val inline = stmt.args.size<=1 && stmt.args.all { it is NumericLiteral || it is IdentifierReference }
if(inline)
makeFullyScoped(stmt)
inline
}
is PostIncrDecr -> {
if(stmt.target.identifier!=null) {
makeFullyScoped(stmt.target.identifier!!)
true
} else
false
}
else if(stmt.target.memoryAddress?.addressExpression is NumericLiteral || stmt.target.memoryAddress?.addressExpression is IdentifierReference) {
if(stmt.target.memoryAddress?.addressExpression is IdentifierReference)
makeFullyScoped(stmt.target.memoryAddress?.addressExpression as IdentifierReference)
true
} else
false
is Assignment -> {
if (stmt.value.isSimple) {
val targetInline =
if (stmt.target.identifier != null) {
makeFullyScoped(stmt.target.identifier!!)
true
} else if (stmt.target.memoryAddress?.addressExpression is NumericLiteral || stmt.target.memoryAddress?.addressExpression is IdentifierReference) {
if (stmt.target.memoryAddress?.addressExpression is IdentifierReference)
makeFullyScoped(stmt.target.memoryAddress?.addressExpression as IdentifierReference)
true
} else
false
val valueInline =
if (stmt.value is IdentifierReference) {
makeFullyScoped(stmt.value as IdentifierReference)
true
} else if ((stmt.value as? DirectMemoryRead)?.addressExpression is NumericLiteral || (stmt.value as? DirectMemoryRead)?.addressExpression is IdentifierReference) {
if ((stmt.value as? DirectMemoryRead)?.addressExpression is IdentifierReference)
makeFullyScoped((stmt.value as? DirectMemoryRead)?.addressExpression as IdentifierReference)
true
} else
false
targetInline || valueInline
} else
false
}
is BuiltinFunctionCallStatement -> {
val inline =
stmt.args.size <= 1 && stmt.args.all { it is NumericLiteral || it is IdentifierReference }
if (inline)
makeFullyScoped(stmt)
inline
}
is FunctionCallStatement -> {
val inline =
stmt.args.size <= 1 && stmt.args.all { it is NumericLiteral || it is IdentifierReference }
if (inline)
makeFullyScoped(stmt)
inline
}
is PostIncrDecr -> {
if (stmt.target.identifier != null) {
makeFullyScoped(stmt.target.identifier!!)
true
} else if (stmt.target.memoryAddress?.addressExpression is NumericLiteral || stmt.target.memoryAddress?.addressExpression is IdentifierReference) {
if (stmt.target.memoryAddress?.addressExpression is IdentifierReference)
makeFullyScoped(stmt.target.memoryAddress?.addressExpression as IdentifierReference)
true
} else
false
}
is Jump -> true
else -> false
}
is Jump -> true
else -> false
}
}
}
if(subroutine.inline && subroutine.statements.size>1) {
@ -134,16 +144,20 @@ class Inliner(private val program: Program, private val options: CompilationOpti
private fun makeFullyScoped(call: BuiltinFunctionCallStatement) {
val scopedArgs = makeScopedArgs(call.args)
val scopedCall = BuiltinFunctionCallStatement(call.target.copy(), scopedArgs.toMutableList(), call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
if(scopedArgs.any()) {
val scopedCall = BuiltinFunctionCallStatement(call.target.copy(), scopedArgs.toMutableList(), call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
}
}
private fun makeFullyScoped(call: FunctionCallStatement) {
call.target.targetSubroutine(program)?.let { sub ->
val scopedName = IdentifierReference(sub.scopedName, call.target.position)
val scopedArgs = makeScopedArgs(call.args)
val scopedCall = FunctionCallStatement(scopedName, scopedArgs.toMutableList(), call.void, call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
if(scopedArgs.any()) {
val scopedCall = FunctionCallStatement(scopedName, scopedArgs.toMutableList(), call.void, call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
}
}
}
@ -151,8 +165,10 @@ class Inliner(private val program: Program, private val options: CompilationOpti
call.target.targetSubroutine(program)?.let { sub ->
val scopedName = IdentifierReference(sub.scopedName, call.target.position)
val scopedArgs = makeScopedArgs(call.args)
val scopedCall = BuiltinFunctionCall(scopedName, scopedArgs.toMutableList(), call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
if(scopedArgs.any()) {
val scopedCall = BuiltinFunctionCall(scopedName, scopedArgs.toMutableList(), call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
}
}
}
@ -160,8 +176,10 @@ class Inliner(private val program: Program, private val options: CompilationOpti
call.target.targetSubroutine(program)?.let { sub ->
val scopedName = IdentifierReference(sub.scopedName, call.target.position)
val scopedArgs = makeScopedArgs(call.args)
val scopedCall = FunctionCallExpression(scopedName, scopedArgs.toMutableList(), call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
if(scopedArgs.any()) {
val scopedCall = FunctionCallExpression(scopedName, scopedArgs.toMutableList(), call.position)
modifications += IAstModification.ReplaceNode(call, scopedCall, call.parent)
}
}
}
@ -170,7 +188,8 @@ class Inliner(private val program: Program, private val options: CompilationOpti
when (it) {
is NumericLiteral -> it.copy()
is IdentifierReference -> {
val scoped = (it.targetStatement(program)!! as INamedStatement).scopedName
val target = it.targetStatement(program) ?: return emptyList()
val scoped = (target as INamedStatement).scopedName
IdentifierReference(scoped, it.position)
}
else -> throw InternalCompilerException("expected only number or identifier arg, otherwise too complex")

View File

@ -853,4 +853,28 @@ main {
}"""
compileText(Cx16Target(), true, src, writeAssembly = true) shouldNotBe null
}
test("no crash for making var in removed/inlined subroutine fully scoped") {
val src="""
main {
sub start() {
test()
}
sub test() {
sub nested() {
ubyte counter
counter++
}
test2(main.test.nested.counter) ; shouldn't crash but just give nice error
}
sub test2(ubyte value) {
value++
}
}"""
val errors = ErrorReporterForTests()
compileText(Cx16Target(), true, src, writeAssembly = false, errors = errors) shouldBe null
errors.errors.single() shouldContain "undefined symbol"
}
})

View File

@ -594,7 +594,7 @@ the emulators already support it).
by calls to verafx.muls/mult, but be careful with it because it may interfere with other Vera operations or IRQs.
Note: the lower 16 bits of the 32 bits result is returned as the normal subroutine's returnvalue,
but the upper 16 bits is returned in `cx16.r0` so you can still access those separately.
but the upper 16 bits is returned in cx16.r0 so you can still access those separately.
``clear``
Very quickly clear a piece of vram to a given byte value (it writes 4 bytes at a time).

View File

@ -61,11 +61,9 @@ Code
Subroutine
Defines a piece of code that can be called by its name from different locations in your code.
It accepts parameters and can return a value (optional).
It can define its own variables, and it is even possible to define subroutines nested inside other subroutines.
Their contents is scoped accordingly.
Nested subroutines can access the variables from outer scopes.
This removes the need and overhead to pass everything via parameters.
Subroutines do not have to be declared before they can be called.
It can define its own variables, and it is also possible to define subroutines within other subroutines.
Nested subroutines can access the variables from outer scopes easily, which removes the need and overhead to pass everything via parameters all the time.
Subroutines do not have to be declared in the source code before they can be called.
Label
This is a named position in your code where you can jump to from another place.
@ -79,14 +77,15 @@ Scope
Anything *inside* the scope can refer to symbols in the same scope without using a prefix.
There are three scope levels in Prog8:
- global (no prefix)
- code block
- subroutine
- global (no prefix), everything in a module file goes in here;
- block;
- subroutine, can be nested in another subroutine.
While Modules are separate files, they are *not* separate scopes!
Even though modules are separate files, they are *not* separate scopes!
Everything defined in a module is merged into the global scope.
This is different from most other languages that have modules.
The global scope can only contain blocks and some directives, while the others can contain variables and subroutines too.
Some more details about how to deal with scopes and names is discussed below.
.. _blocks:
@ -110,16 +109,6 @@ The name of a block must be unique in your entire program.
Be careful when importing other modules; blocks in your own code cannot have
the same name as a block defined in an imported module or library.
The address can be used to place a block at a specific location in memory.
Usually it is omitted, and the compiler will automatically choose the location (usually immediately after
the previous block in memory).
It must be >= ``$0200`` (because ``$00``--``$ff`` is the ZP and ``$100``--``$1ff`` is the cpu stack).
.. _scopes:
**Scoping rules**
.. sidebar::
Use qualified names ("dotted names") to reference symbols defined elsewhere
@ -127,29 +116,41 @@ It must be >= ``$0200`` (because ``$00``--``$ff`` is the ZP and ``$100``--``$1ff
So, accessing a variable ``counter`` defined in subroutine ``worker`` in block ``main``,
can be done from anywhere by using ``main.worker.counter``.
The address can be used to place a block at a specific location in memory.
Usually it is omitted, and the compiler will automatically choose the location (usually immediately after
the previous block in memory).
It must be >= ``$0200`` (because ``$00``--``$ff`` is the ZP and ``$100``--``$1ff`` is the cpu stack).
*Symbols* are names defined in a certain *scope*. Inside the same scope, you can refer
to them by their 'short' name directly. If the symbol is not found in the same scope,
the enclosing scope is searched for it, and so on, up to the top level block, until the symbol is found.
If the symbol was not found the compiler will issue an error message.
**Subroutines** create a new scope. All variables inside a subroutine are hoisted up to the
scope of the subroutine they are declared in. Note that you can define **nested subroutines** in Prog8,
and such a nested subroutine has its own scope! This also means that you have to use a fully qualified name
to access a variable from a nested subroutine::
main {
sub start() {
sub nested() {
ubyte counter
...
}
...
txt.print_ub(counter) ; Error: undefined symbol
txt.print_ub(main.start.nested.counter) ; OK
}
}
Scopes are created using either of these two statements:
- blocks (top-level named scope)
- subroutines (nested named scope)
.. important::
Unlike most other programming languages, a new scope is *not* created inside
Emphasizing this once more: unlike most other programming languages, a new scope is *not* created inside
for, while, repeat, and do-until statements, the if statement, and the branching conditionals.
These all share the same scope from the subroutine they're defined in.
You can define variables in these blocks, but these will be treated as if they
were defined in the subroutine instead.
This can seem a bit restrictive because you have to think harder about what variables you
want to use inside the subroutine, to avoid clashes.
But this decision was made for a good reason: memory in prog8's
target systems is usually very limited and it would be a waste to allocate a lot of variables.
The prog8 compiler is not yet advanced enough to be able to share or overlap
variables intelligently. So for now that is something you have to think about yourself.
Program Start and Entry Point
@ -181,7 +182,7 @@ Variables and values
--------------------
Variables are named values that can change during the execution of the program.
They can be defined inside any scope (blocks, subroutines etc.) See :ref:`Scopes <scopes>`.
They can be defined inside any scope (blocks, subroutines etc.) See :ref:`blocks`.
When declaring a numeric variable it is possible to specify the initial value, if you don't want it to be zero.
For other data types it is required to specify that initial value it should get.
Values will usually be part of an expression or assignment statement::

View File

@ -247,7 +247,8 @@ Identifiers
-----------
Naming things in Prog8 is done via valid *identifiers*. They start with a letter,
and after that, a combination of letters, numbers, or underscores. Examples of valid identifiers::
and after that, a combination of letters, numbers, or underscores. Letters are from the 7-bit ASCII alphabet only.
Examples of valid identifiers::
a
A

View File

@ -73,3 +73,14 @@ What if we were to re-introduce Structs in prog8? Some thoughts:
- need to introduce typed pointer datatype in prog8
- str is then syntactic sugar for pointer to character/byte?
- arrays are then syntactic sugar for pointer to byte/word/float?
Other language/syntax features to think about
---------------------------------------------
- allow Unicode letters in identifiers à la Python. Don't forget to normalize all identifiers. See https://github.com/antlr/grammars-v4/blob/master/python/python3_12_0/PythonLexer.g4#L348C10-L348C21
- chained assignments `x=y=z=99`
- declare multiple variables `ubyte x,y,z` (if init value present, all get that init value)
- chained comparisons `10<x<20` , `x==y==z` (desugars to `10<x and x<20`, `x==y and y==z`)
- postincrdecr as expression, preincrdecr expression (`y = x++`, `y = ++x`) .... is this even possible, expression with side effects like this?
- negative array index to refer to an element from the end of the array. Python `[-1]` or Raku syntax `[\*-1]` , `[\*/2]` .... \*=size of the array