From 7d8b42d63e45e4a0dedf73c44eabd7928182599f Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Tue, 5 Dec 2023 01:34:41 +0100 Subject: [PATCH] =?UTF-8?q?allow=20Unicode=20letters=20in=20identifiers:?= =?UTF-8?q?=20things=20like=20'kn=C3=A4ckebr=C3=B6d'=20and=20'=CF=80'=20ar?= =?UTF-8?q?e=20now=20valid=20identifiers.=20=20Added=20floats.=CF=80=20=20?= =?UTF-8?q?constant.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- compiler/res/prog8lib/c64/floats.p8 | 5 +++-- compiler/res/prog8lib/cx16/floats.p8 | 5 +++-- compiler/res/prog8lib/virtual/floats.p8 | 6 ++++-- compiler/test/TestAstChecks.kt | 28 +++++++++++++++++++++++++ docs/source/index.rst | 1 + docs/source/libraries.rst | 7 +++++++ docs/source/programming.rst | 1 + docs/source/syntaxreference.rst | 8 +++++-- docs/source/todo.rst | 3 ++- parser/antlr/Prog8ANTLR.g4 | 2 +- 10 files changed, 56 insertions(+), 10 deletions(-) diff --git a/compiler/res/prog8lib/c64/floats.p8 b/compiler/res/prog8lib/c64/floats.p8 index d90b7852b..22239475c 100644 --- a/compiler/res/prog8lib/c64/floats.p8 +++ b/compiler/res/prog8lib/c64/floats.p8 @@ -7,8 +7,9 @@ floats { ; ---- this block contains C-64 floating point related functions ---- %option no_symbol_prefixing - const float PI = 3.141592653589793 - const float TWOPI = 6.283185307179586 + const float π = 3.141592653589793 + const float PI = π + const float TWOPI = 2*π ; ---- C64 basic and kernal ROM float constants and functions ---- diff --git a/compiler/res/prog8lib/cx16/floats.p8 b/compiler/res/prog8lib/cx16/floats.p8 index 747d51e6b..eebd0bdb1 100644 --- a/compiler/res/prog8lib/cx16/floats.p8 +++ b/compiler/res/prog8lib/cx16/floats.p8 @@ -8,8 +8,9 @@ floats { %option no_symbol_prefixing - const float PI = 3.141592653589793 - const float TWOPI = 6.283185307179586 + const float π = 3.141592653589793 + const float PI = π + const float TWOPI = 2*π ; ---- ROM float functions (same as on C128 except base page) ---- diff --git a/compiler/res/prog8lib/virtual/floats.p8 b/compiler/res/prog8lib/virtual/floats.p8 index 4c2e209d9..fee90bf52 100644 --- a/compiler/res/prog8lib/virtual/floats.p8 +++ b/compiler/res/prog8lib/virtual/floats.p8 @@ -4,8 +4,10 @@ floats { - const float PI = 3.141592653589793 - const float TWOPI = 6.283185307179586 + const float π = 3.141592653589793 + const float PI = π + const float TWOPI = 2*π + sub print_f(float value) { ; ---- prints the floating point value (without a newline). diff --git a/compiler/test/TestAstChecks.kt b/compiler/test/TestAstChecks.kt index 69e9c331d..4330cd2e5 100644 --- a/compiler/test/TestAstChecks.kt +++ b/compiler/test/TestAstChecks.kt @@ -5,6 +5,8 @@ import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldNotBe import io.kotest.matchers.string.shouldContain import prog8.code.target.C64Target +import prog8.code.target.Cx16Target +import prog8.code.target.VMTarget import prog8tests.helpers.ErrorReporterForTests import prog8tests.helpers.compileText @@ -145,4 +147,30 @@ class TestAstChecks: FunSpec({ errors.errors.size shouldBe 0 errors.warnings.size shouldBe 0 } + + test("unicode in identifier names is working") { + val text = """ +%import floats + +main { + ubyte приблизительно = 99 + + sub start() { + str knäckebröd = "crunchy" + prt(knäckebröd) + printf(2*floats.π) + } + + sub prt(str message) { + приблизительно++ + } + + sub printf(float fl) { + приблизительно++ + } +}""" + compileText(C64Target(), false, text, writeAssembly = true) shouldNotBe null + compileText(Cx16Target(), false, text, writeAssembly = true) shouldNotBe null + compileText(VMTarget(), false, text, writeAssembly = true) shouldNotBe null + } }) diff --git a/docs/source/index.rst b/docs/source/index.rst index 1fdbbd918..ed39682e0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -71,6 +71,7 @@ Language features - Variable data types include signed and unsigned bytes and words, arrays, strings. - Floating point math is supported on select compiler targets. - Strings can contain escaped characters but also many symbols directly if they have a PETSCII equivalent, such as "♠♥♣♦π▚●○╳". Characters like ^, _, \\, {, } and | are also accepted and converted to the closest PETSCII equivalents. +- Identifiers can contain Unicode Letters, so ``knäckebröd``, ``приблизительно``, ``見せしめ`` and ``π`` are all valid identifiers. - High-level code optimizations, such as const-folding (zero-allocation constants that are optimized away in expressions), expression and statement simplifications/rewriting. - Programs can be run multiple times without reloading because of automatic variable (re)initializations. - Supports the sixteen 'virtual' 16-bit registers R0 .. R15 as defined on the Commander X16, also on the other machines. diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst index b4297f2ff..5fec5672d 100644 --- a/docs/source/libraries.rst +++ b/docs/source/libraries.rst @@ -292,6 +292,13 @@ floats Provides definitions for the ROM/Kernal subroutines and utility routines dealing with floating point variables. This includes ``print_f``, the routine used to print floating point numbers. + +``π`` and ``PI`` + float const for the number Pi, 3.141592653589793... + +``TWOPI`` + float const for the number 2 times Pi + ``atan (x)`` Arctangent. diff --git a/docs/source/programming.rst b/docs/source/programming.rst index 4adcef92b..26a0988d2 100644 --- a/docs/source/programming.rst +++ b/docs/source/programming.rst @@ -18,6 +18,7 @@ Module A file on disk with the ``.p8`` suffix. It can contain *directives* and *code blocks*. Whitespace and indentation in the source code are arbitrary and can be mixed tabs or spaces. A module file can *import* other modules, including *library modules*. + It should be saved in UTF-8 encoding. Comments Everything on the line after a semicolon ``;`` is a comment and is ignored by the compiler. diff --git a/docs/source/syntaxreference.rst b/docs/source/syntaxreference.rst index 6e4e30131..f0d72ed32 100644 --- a/docs/source/syntaxreference.rst +++ b/docs/source/syntaxreference.rst @@ -8,7 +8,7 @@ Module file ----------- This is a file with the ``.p8`` suffix, containing *directives* and *code blocks*, described below. -The file is a text file which can also contain: +The file is a text file, saved in UTF-8 encoding, which can also contain: Lines, whitespace, indentation ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -247,7 +247,8 @@ Identifiers ----------- Naming things in Prog8 is done via valid *identifiers*. They start with a letter, -and after that, a combination of letters, numbers, or underscores. Letters are from the 7-bit ASCII alphabet only. +and after that, a combination of letters, numbers, or underscores. +Note that any Unicode Letter symbol is accepted as a letter! Examples of valid identifiers:: a @@ -256,6 +257,9 @@ Examples of valid identifiers:: COUNTER Better_Name_2 something_strange__ + knäckebröd + приблизительно + π Code blocks diff --git a/docs/source/todo.rst b/docs/source/todo.rst index e83572ae0..adb93736d 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -2,6 +2,8 @@ TODO ==== +- add more projects such as Paint to the Software written in Prog8 list + - [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 .... ... @@ -78,7 +80,6 @@ What if we were to re-introduce Structs in prog8? Some thoughts: Other language/syntax features to think about --------------------------------------------- -- allow Unicode letters in identifiers à la Python. Don't forget to normalize all identifiers. See https://github.com/antlr/grammars-v4/blob/master/python/python3_12_0/PythonLexer.g4#L348C10-L348C21 - chained assignments `x=y=z=99` - declare multiple variables `ubyte x,y,z` (if init value present, all get that init value) - chained comparisons `10 skip ; WS : [ \t] -> skip ; // WS2 : '\\' EOL -> skip; VOID: 'void'; -NAME : [a-zA-Z][a-zA-Z0-9_]* ; +NAME : [\p{Letter}][\p{Letter}\p{Digit}_]* ; // match unicode properties DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+); HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ; BIN_INTEGER : '%' ('0' | '1')+ ;