Expressions can now be split over multiple lines. Fixed datatype of len().

2025-04-05 03:37:25 +00:00 · 2019-01-26 17:32:26 +01:00 · 2019-01-26 17:32:26 +01:00 · fa8a3c997a
commit fa8a3c997a
parent 1ff68b06da
11 changed files with 893 additions and 570 deletions
--- a/compiler/src/prog8/ast/AstChecker.kt
+++ b/compiler/src/prog8/ast/AstChecker.kt
@ -810,7 +810,7 @@ private class AstChecker(private val namespace: INameScope,
                for (arg in args.withIndex().zip(target.parameters)) {
                    val argDt = arg.first.value.resultingDatatype(namespace, heap)
                    if(argDt!=null && !argDt.assignableTo(arg.second.type))
-                        checkResult.add(ExpressionError("subroutine argument ${arg.first.index+1} has invalid type, expected ${arg.second.type}", position))
+                        checkResult.add(ExpressionError("subroutine argument ${arg.first.index+1} has invalid type $argDt, expected ${arg.second.type}", position))

                    if(target.isAsmSubroutine) {
                        if (target.asmParameterRegisters[arg.first.index].registerOrPair in setOf(RegisterOrPair.AX, RegisterOrPair.XY, RegisterOrPair.X)) {
@ -864,6 +864,9 @@ private class AstChecker(private val namespace: INameScope,
                if(index!=null && (index<0 || index>=arraysize))
                    checkResult.add(ExpressionError("array index out of bounds", arrayIndexedExpression.arrayspec.position))
            } else if(target.datatype in StringDatatypes) {
+                // check supported string tyep
+                if(target.datatype == DataType.STR_P || target.datatype==DataType.STR_PS)
+                    checkResult.add(ExpressionError("indexing pascal-strings is not supported, use regular str type instead", arrayIndexedExpression.arrayspec.position))
                // check string lengths
                val heapId = (target.value as LiteralValue).heapId!!
                val stringLen = heap.get(heapId).str!!.length
--- a/compiler/src/prog8/compiler/Compiler.kt
+++ b/compiler/src/prog8/compiler/Compiler.kt
@ -312,7 +312,7 @@ private class StatementTranslator(private val prog: IntermediateProgram,
            DataType.ARRAY_UW, DataType.ARRAY_W -> Opcode.WRITE_INDEXED_VAR_WORD
            DataType.ARRAY_F -> Opcode.WRITE_INDEXED_VAR_FLOAT
            DataType.STR, DataType.STR_S -> Opcode.WRITE_INDEXED_VAR_BYTE
-            DataType.STR_P, DataType.STR_PS -> throw CompilerException("cannot index on type $dt - use regular 0-terminated str type")
+            DataType.STR_P, DataType.STR_PS -> TODO("cannot index on type $dt - use regular str type")
            else -> throw CompilerException("invalid dt for indexed access $dt")
        }
    }
--- a/compiler/src/prog8/functions/BuiltinFunctions.kt
+++ b/compiler/src/prog8/functions/BuiltinFunctions.kt
@ -1,6 +1,7 @@
 package prog8.functions

 import prog8.ast.*
+import prog8.compiler.CompilerException
 import prog8.compiler.HeapValues
 import kotlin.math.PI
 import kotlin.math.cos
@ -298,15 +299,21 @@ private fun builtinLen(args: List<IExpression>, position: Position, namespace:IN
    return when(argument.type) {
        DataType.ARRAY_UB, DataType.ARRAY_B, DataType.ARRAY_UW, DataType.ARRAY_W -> {
            val arraySize = argument.arrayvalue?.size ?: heap.get(argument.heapId!!).arraysize
-            LiteralValue(DataType.UWORD, wordvalue=arraySize, position=args[0].position)
+            if(arraySize>255)
+                throw CompilerException("array length exceeds byte limit ${argument.position}")
+            LiteralValue(DataType.UBYTE, bytevalue=arraySize.toShort(), position=args[0].position)
        }
        DataType.ARRAY_F -> {
            val arraySize = argument.arrayvalue?.size ?: heap.get(argument.heapId!!).arraysize
-            LiteralValue(DataType.UWORD, wordvalue=arraySize, position=args[0].position)
+            if(arraySize>255)
+                throw CompilerException("array length exceeds byte limit ${argument.position}")
+            LiteralValue(DataType.UBYTE, bytevalue=arraySize.toShort(), position=args[0].position)
        }
        DataType.STR, DataType.STR_P, DataType.STR_S, DataType.STR_PS -> {
            val str = argument.strvalue(heap)
-            LiteralValue(DataType.UWORD, wordvalue=str.length, position=args[0].position)
+            if(str.length>255)
+                throw CompilerException("string length exceeds byte limit ${argument.position}")
+            LiteralValue(DataType.UBYTE, bytevalue=str.length.toShort(), position=args[0].position)
        }
        DataType.UBYTE, DataType.BYTE,
        DataType.UWORD, DataType.WORD,
--- a/docs/docs.iml
+++ b/docs/docs.iml
@ -2,7 +2,9 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager" inherit-compiler-output="true">
    <exclude-output />
-    <content url="file://$MODULE_DIR$" />
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/build" />
+    </content>
    <orderEntry type="jdk" jdkName="Python 3.7" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
--- a/docs/source/programming.rst
+++ b/docs/source/programming.rst
@ -111,13 +111,6 @@ Usually it is omitted, and the compiler will automatically choose the location (
 the previous block in memory).
 The address must be >= ``$0200`` (because ``$00``--``$ff`` is the ZP and ``$100``--``$200`` is the cpu stack).

-**The special "ZP" ZeroPage block**
-
-Blocks named "ZP" are treated a bit differently: they refer to the ZeroPage.
-The contents of every block with that name (this one may occur multiple times) are merged into one.
-Its start address is always set to ``$04``, because ``$00 - $01`` are used by the hardware
-and ``$02 - $03`` are reserved as general purpose scratch registers.
-

 .. _scopes:

@ -194,7 +187,51 @@ Values will usually be part of an expression or assignment statement::
    byte  counter  = 42   ; variable of size 8 bits, with initial value 42


-Array types are also supported. They can be made of bytes, words and floats::
+.. todo::
+    There must be a way to tell the compiler which variables you require to be in Zeropage:
+    ``zeropage`` modifier keyword on vardecl perhaps?
+
+
+Variables that represent CPU hardware registers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following variables are reserved
+and map directly (read/write) to a CPU hardware register: ``A``, ``X``, ``Y``.
+
+
+Integers
+^^^^^^^^
+
+Integers are 8 or 16 bit numbers and can be written in normal decimal notation,
+in hexadecimal and in binary notation.
+A single character in single quotes such as ``'a'`` is translated into a byte integer,
+which is the Petscii value for that character.
+
+Unsigned integers are in the range 0-255 for unsigned byte types, and 0-65535 for unsigned word types.
+The signed integers integers are in the range -128..127 for bytes,
+and -32768..32767 for words.
+
+
+Floating point numbers
+^^^^^^^^^^^^^^^^^^^^^^
+
+Floats are stored in the 5-byte 'MFLPT' format that is used on CBM machines,
+and currently all floating point operations are specific to the Commodore-64.
+This is because routines in the C-64 BASIC and KERNAL ROMs are used for that.
+So floating point operations will only work if the C-64 BASIC ROM (and KERNAL ROM)
+are banked in.
+
+Also your code needs to import the ``c64flt`` library to enable floating point support
+in the compiler, and to gain access to the floating point routines.
+(this library contains the directive to enable floating points, you don't have
+to worry about this yourself)
+
+The largest 5-byte MFLPT float that can be stored is: **1.7014118345e+38**   (negative: **-1.7014118345e+38**)
+
+
+Arrays
+^^^^^^
+Array types are also supported. They can be made of bytes, words or floats::

    byte[4]  array = [1, 2, 3, 4]     ; initialize the array
    byte[99] array = 255              ; initialize array with all 255's [255, 255, 255, 255, ...]
@ -214,16 +251,22 @@ Note that the various keywords for the data type and variable type (``byte``, ``
 can't be used as *identifiers* elsewhere. You can't make a variable, block or subroutine with the name ``byte``
 for instance.

-.. todo::
-    There must be a way to tell the compiler which variables you require to be in Zeropage:
-    ``zeropage`` modifier keyword on vardecl perhaps?

+Strings
+^^^^^^^

-Variables that represent CPU hardware registers
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Strings are a sequence of characters enclosed in ``"`` quotes. The length is limited to 255 characters.
+They're stored and treated much the same as a byte array,
+but they have some special properties because they are considered to be *text*.
+Strings in your source code files will be encoded (translated from ASCII/UTF-8) into either CBM PETSCII or C-64 screencodes.
+PETSCII is the default choice. If you need screencodes (also called 'poke' codes) instead,
+you have to use the ``str_s`` variants of the string type identifier.

-The following variables are reserved
-and map directly (read/write) to a CPU hardware register: ``A``, ``X``, ``Y``.
+.. caution::
+    It's probably best that you don't change strings after they're created.
+    This is because if your program exits and is restarted (without loading it again),
+    it will then operate on the changed strings instead of the original ones.
+    The same is true for arrays by the way.


 Special types: const and memory-mapped
@ -250,53 +293,6 @@ address you specified, and setting the varible will directly modify that memory
    stands for, and the compiler also knows the data type.


-Integers
-^^^^^^^^
-
-Integers are 8 or 16 bit numbers and can be written in normal decimal notation,
-in hexadecimal and in binary notation.
-A single character in single quotes such as ``'a'`` is translated into a byte integer,
-which is the Petscii value for that character.
-
-Unsigned integers are in the range 0-255 for unsigned byte types, and 0-65535 for unsigned word types.
-The signed integers integers are in the range -128..127 for bytes,
-and -32768..32767 for words.
-
-
-Strings
-^^^^^^^
-
-Strings are a sequence of characters enclosed in ``"`` quotes. The length is limited to 255 characters.
-They're stored and treated much the same as a byte array,
-but they have some special properties because they are considered to be *text*.
-Strings in your source code files will be encoded (translated from ASCII/UTF-8) into either CBM PETSCII or C-64 screencodes.
-PETSCII is the default choice. If you need screencodes (also called 'poke' codes) instead,
-you have to use the ``str_s`` variants of the string type identifier.
-
-.. caution::
-    It's probably best that you don't change strings after they're created.
-    This is because if your program exits and is restarted (without loading it again),
-    it will then operate on the changed strings instead of the original ones.
-    The same is true for arrays by the way.
-
-
-Floating point numbers
-^^^^^^^^^^^^^^^^^^^^^^
-
-Floats are stored in the 5-byte 'MFLPT' format that is used on CBM machines,
-and currently all floating point operations are specific to the Commodore-64.
-This is because routines in the C-64 BASIC and KERNAL ROMs are used for that.
-So floating point operations will only work if the C-64 BASIC ROM (and KERNAL ROM)
-are banked in.
-
-Also your code needs to import the ``c64flt`` library to enable floating point support
-in the compiler, and to gain access to the floating point routines.
-(this library contains the directive to enable floating points, you don't have
-to worry about this yourself)
-
-The largest 5-byte MFLPT float that can be stored is: **1.7014118345e+38**   (negative: **-1.7014118345e+38**)
-
-
 Converting types into other types
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

@ -443,6 +439,18 @@ a memory mapped location, you can do so by enclosing the address in ``@(...)``::
 Expressions
 -----------

+Expressions tell the program to *calculate* something. They consist of
+values, variables, operators such as ``+`` and ``-``, function calls, type casts, or other expressions.
+Here is an example that calculates to number of seconds in a certain time period::
+
+    num_hours * 3600 + num_minutes * 60 + num_seconds
+
+Long expressions can be split over multiple lines by inserting a line break before or after an operator::
+
+    num_hours * 3600
+     + num_minutes * 60
+     + num_seconds
+
 In most places where a number or other value is expected, you can use just the number, or a constant expression.
 If possible, the expression is parsed and evaluated by the compiler itself at compile time, and the (constant) resulting value is used in its place.
 Expressions that cannot be compile-time evaluated will result in code that calculates them at runtime.
--- a/docs/source/syntaxreference.rst
+++ b/docs/source/syntaxreference.rst
@ -458,6 +458,19 @@ The return type has to be specified if the subroutine returns a value.
    asmsub with a regular body to precisely control what registers are used to call the subroutine


+Expressions
+-----------
+
+Expressions calculate a value and can be used almost everywhere a value is expected.
+They consist of values, variables, operators, function calls, type casts, direct memory reads,
+and can be combined into other expressions.
+Long expressions can be split over multiple lines by inserting a line break before or after an operator::
+
+    num_hours * 3600
+     + num_minutes * 60
+     + num_seconds
+
+
 Loops
 -----

--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -33,3 +33,10 @@ Add more compiler optimizations to the existing ones.

 Also some library routines and code patterns could perhaps be optimized further

+
+Should use the zeropage for variables
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Variables should be allocated in the zeropage as long as it has space.
+- add some sort of ``zp`` modifier keyword on vardecls to force them into zeropage?
+
--- a/examples/mandelbrot.p8
+++ b/examples/mandelbrot.p8
@ -37,7 +37,9 @@
            }
        }

-        float duration = floor(((c64.TIME_LO as float) + 256.0*(c64.TIME_MID as float) + 65536.0*(c64.TIME_HI as float))/60.0)
+        float duration = floor(((c64.TIME_LO as float)
+                                + 256.0*(c64.TIME_MID as float)
+                                + 65536.0*(c64.TIME_HI as float))/60.0)
        c64scr.PLOT(0, 21)
        c64scr.print("finished in ")
        c64flt.print_f(duration)
--- a/parser/antlr/prog8.g4
+++ b/parser/antlr/prog8.g4
@ -15,6 +15,7 @@ LINECOMMENT : [\r\n][ \t]* COMMENT -> channel(HIDDEN);
 COMMENT :  ';' ~[\r\n]* -> channel(HIDDEN) ;
 WS :  [ \t] -> skip ;
 EOL :  [\r\n]+ ;
+// WS2 : '\\' EOL -> skip;
 NAME :  [a-zA-Z_][a-zA-Z0-9_]* ;
 DEC_INTEGER :  ('0'..'9') | (('1'..'9')('0'..'9')+);
 HEX_INTEGER :  '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ;
@ -127,19 +128,19 @@ postincrdecr :  assign_target  operator = ('++' | '--') ;
 expression :
 	functioncall
 	| <assoc=right> prefix = ('+'|'-'|'~') expression
-	| left = expression bop = '**' right = expression
-	| left = expression bop = ('*' | '/' | '%' ) right = expression
-	| left = expression bop = ('+' | '-' ) right = expression
-	| left = expression bop = ('<<' | '>>' ) right = expression
-	| left = expression bop = ('<' | '>' | '<=' | '>=') right = expression
-	| left = expression bop = ('==' | '!=') right = expression
-	| left = expression bop = '&' right = expression
-	| left = expression bop = '^' right = expression
-	| left = expression bop = '|' right = expression
+	| left = expression EOL? bop = '**' EOL? right = expression
+	| left = expression EOL? bop = ('*' | '/' | '%' ) EOL? right = expression
+	| left = expression EOL? bop = ('+' | '-' ) EOL? right = expression
+	| left = expression EOL? bop = ('<<' | '>>' ) EOL? right = expression
+	| left = expression EOL? bop = ('<' | '>' | '<=' | '>=') EOL? right = expression
+	| left = expression EOL? bop = ('==' | '!=') EOL? right = expression
+	| left = expression EOL? bop = '&' EOL? right = expression
+	| left = expression EOL? bop = '^' EOL? right = expression
+	| left = expression EOL? bop = '|' EOL? right = expression
 	| rangefrom = expression 'to' rangeto = expression ('step' rangestep = expression)?	// can't create separate rule due to mutual left-recursion
-	| left = expression bop = 'and' right = expression
-	| left = expression bop = 'or' right = expression
-	| left = expression bop = 'xor' right = expression
+	| left = expression EOL? bop = 'and' EOL? right = expression
+	| left = expression EOL? bop = 'or' EOL? right = expression
+	| left = expression EOL? bop = 'xor' EOL? right = expression
 	| prefix = 'not' expression
 	| literalvalue
 	| register
--- a/parser/src/prog8/parser/prog8Lexer.java
+++ b/parser/src/prog8/parser/prog8Lexer.java
@ -1,4 +1,4 @@
-// Generated from ./parser/antlr/prog8.g4 by ANTLR 4.7.2
+// Generated from /home/irmen/Projects/prog8/parser/antlr/prog8.g4 by ANTLR 4.7.2
 package prog8.parser;
 import org.antlr.v4.runtime.Lexer;
 import org.antlr.v4.runtime.CharStream;
--- a/parser/src/prog8/parser/prog8Parser.java
+++ b/parser/src/prog8/parser/prog8Parser.java