allow Unicode letters in identifiers: things like 'knäckebröd' and 'π' are now valid identifiers. Added floats.π constant.

This commit is contained in:
Irmen de Jong 2023-12-05 01:34:41 +01:00
parent 6ebd4e821f
commit 7d8b42d63e
10 changed files with 56 additions and 10 deletions

View File

@ -7,8 +7,9 @@ floats {
; ---- this block contains C-64 floating point related functions ---- ; ---- this block contains C-64 floating point related functions ----
%option no_symbol_prefixing %option no_symbol_prefixing
const float PI = 3.141592653589793 const float π = 3.141592653589793
const float TWOPI = 6.283185307179586 const float PI = π
const float TWOPI = 2*π
; ---- C64 basic and kernal ROM float constants and functions ---- ; ---- C64 basic and kernal ROM float constants and functions ----

View File

@ -8,8 +8,9 @@ floats {
%option no_symbol_prefixing %option no_symbol_prefixing
const float PI = 3.141592653589793 const float π = 3.141592653589793
const float TWOPI = 6.283185307179586 const float PI = π
const float TWOPI = 2*π
; ---- ROM float functions (same as on C128 except base page) ---- ; ---- ROM float functions (same as on C128 except base page) ----

View File

@ -4,8 +4,10 @@
floats { floats {
const float PI = 3.141592653589793 const float π = 3.141592653589793
const float TWOPI = 6.283185307179586 const float PI = π
const float TWOPI = 2*π
sub print_f(float value) { sub print_f(float value) {
; ---- prints the floating point value (without a newline). ; ---- prints the floating point value (without a newline).

View File

@ -5,6 +5,8 @@ import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe import io.kotest.matchers.shouldNotBe
import io.kotest.matchers.string.shouldContain import io.kotest.matchers.string.shouldContain
import prog8.code.target.C64Target import prog8.code.target.C64Target
import prog8.code.target.Cx16Target
import prog8.code.target.VMTarget
import prog8tests.helpers.ErrorReporterForTests import prog8tests.helpers.ErrorReporterForTests
import prog8tests.helpers.compileText import prog8tests.helpers.compileText
@ -145,4 +147,30 @@ class TestAstChecks: FunSpec({
errors.errors.size shouldBe 0 errors.errors.size shouldBe 0
errors.warnings.size shouldBe 0 errors.warnings.size shouldBe 0
} }
test("unicode in identifier names is working") {
val text = """
%import floats
main {
ubyte приблизительно = 99
sub start() {
str knäckebröd = "crunchy"
prt(knäckebröd)
printf(2*floats.π)
}
sub prt(str message) {
приблизительно++
}
sub printf(float fl) {
приблизительно++
}
}"""
compileText(C64Target(), false, text, writeAssembly = true) shouldNotBe null
compileText(Cx16Target(), false, text, writeAssembly = true) shouldNotBe null
compileText(VMTarget(), false, text, writeAssembly = true) shouldNotBe null
}
}) })

View File

@ -71,6 +71,7 @@ Language features
- Variable data types include signed and unsigned bytes and words, arrays, strings. - Variable data types include signed and unsigned bytes and words, arrays, strings.
- Floating point math is supported on select compiler targets. - Floating point math is supported on select compiler targets.
- Strings can contain escaped characters but also many symbols directly if they have a PETSCII equivalent, such as "♠♥♣♦π▚●○╳". Characters like ^, _, \\, {, } and | are also accepted and converted to the closest PETSCII equivalents. - Strings can contain escaped characters but also many symbols directly if they have a PETSCII equivalent, such as "♠♥♣♦π▚●○╳". Characters like ^, _, \\, {, } and | are also accepted and converted to the closest PETSCII equivalents.
- Identifiers can contain Unicode Letters, so ``knäckebröd``, ``приблизительно``, ``見せしめ`` and ``π`` are all valid identifiers.
- High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting. - High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting.
- Programs can be run multiple times without reloading because of automatic variable (re)initializations. - Programs can be run multiple times without reloading because of automatic variable (re)initializations.
- Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines. - Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines.

View File

@ -292,6 +292,13 @@ floats
Provides definitions for the ROM/Kernal subroutines and utility routines dealing with floating Provides definitions for the ROM/Kernal subroutines and utility routines dealing with floating
point variables. This includes ``print_f``, the routine used to print floating point numbers. point variables. This includes ``print_f``, the routine used to print floating point numbers.
``π`` and ``PI``
float const for the number Pi, 3.141592653589793...
``TWOPI``
float const for the number 2 times Pi
``atan (x)`` ``atan (x)``
Arctangent. Arctangent.

View File

@ -18,6 +18,7 @@ Module
A file on disk with the ``.p8`` suffix. It can contain *directives* and *code blocks*. A file on disk with the ``.p8`` suffix. It can contain *directives* and *code blocks*.
Whitespace and indentation in the source code are arbitrary and can be mixed tabs or spaces. Whitespace and indentation in the source code are arbitrary and can be mixed tabs or spaces.
A module file can *import* other modules, including *library modules*. A module file can *import* other modules, including *library modules*.
It should be saved in UTF-8 encoding.
Comments Comments
Everything on the line after a semicolon ``;`` is a comment and is ignored by the compiler. Everything on the line after a semicolon ``;`` is a comment and is ignored by the compiler.

View File

@ -8,7 +8,7 @@ Module file
----------- -----------
This is a file with the ``.p8`` suffix, containing *directives* and *code blocks*, described below. This is a file with the ``.p8`` suffix, containing *directives* and *code blocks*, described below.
The file is a text file which can also contain: The file is a text file, saved in UTF-8 encoding, which can also contain:
Lines, whitespace, indentation Lines, whitespace, indentation
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -247,7 +247,8 @@ Identifiers
----------- -----------
Naming things in Prog8 is done via valid *identifiers*. They start with a letter, Naming things in Prog8 is done via valid *identifiers*. They start with a letter,
and after that, a combination of letters, numbers, or underscores. Letters are from the 7-bit ASCII alphabet only. and after that, a combination of letters, numbers, or underscores.
Note that any Unicode Letter symbol is accepted as a letter!
Examples of valid identifiers:: Examples of valid identifiers::
a a
@ -256,6 +257,9 @@ Examples of valid identifiers::
COUNTER COUNTER
Better_Name_2 Better_Name_2
something_strange__ something_strange__
knäckebröd
приблизительно
π
Code blocks Code blocks

View File

@ -2,6 +2,8 @@
TODO TODO
==== ====
- add more projects such as Paint to the Software written in Prog8 list
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
... ...
@ -78,7 +80,6 @@ What if we were to re-introduce Structs in prog8? Some thoughts:
Other language/syntax features to think about Other language/syntax features to think about
--------------------------------------------- ---------------------------------------------
- allow Unicode letters in identifiers à la Python. Don't forget to normalize all identifiers. See https://github.com/antlr/grammars-v4/blob/master/python/python3_12_0/PythonLexer.g4#L348C10-L348C21
- chained assignments `x=y=z=99` - chained assignments `x=y=z=99`
- declare multiple variables `ubyte x,y,z` (if init value present, all get that init value) - declare multiple variables `ubyte x,y,z` (if init value present, all get that init value)
- chained comparisons `10<x<20` , `x==y==z` (desugars to `10<x and x<20`, `x==y and y==z`) - chained comparisons `10<x<20` , `x==y==z` (desugars to `10<x and x<20`, `x==y and y==z`)

View File

@ -24,7 +24,7 @@ BLOCK_COMMENT : '/*' ( BLOCK_COMMENT | . )*? '*/' -> skip ;
WS : [ \t] -> skip ; WS : [ \t] -> skip ;
// WS2 : '\\' EOL -> skip; // WS2 : '\\' EOL -> skip;
VOID: 'void'; VOID: 'void';
NAME : [a-zA-Z][a-zA-Z0-9_]* ; NAME : [\p{Letter}][\p{Letter}\p{Digit}_]* ; // match unicode properties
DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+); DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+);
HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ; HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ;
BIN_INTEGER : '%' ('0' | '1')+ ; BIN_INTEGER : '%' ('0' | '1')+ ;