fix some unicode identifier issues

2024-07-13 12:29:05 +00:00 · 2023-12-05 17:38:23 +01:00 · 2023-12-05 17:38:23 +01:00 · f998888d6d
commit f998888d6d
parent 7d8b42d63e
4 changed files with 12 additions and 10 deletions
--- a/codeCore/src/prog8/code/core/SourceCode.kt
+++ b/codeCore/src/prog8/code/core/SourceCode.kt
@ -3,6 +3,7 @@ package prog8.code.core
 import java.io.File
 import java.io.IOException
 import java.nio.file.Path
+import java.text.Normalizer
 import kotlin.io.path.Path
 import kotlin.io.path.readText

@ -94,7 +95,7 @@ sealed class SourceCode {
            val normalized = path.normalize()
            origin = relative(normalized).toString()
            try {
-                text = normalized.readText()
+                text = Normalizer.normalize(normalized.readText(), Normalizer.Form.NFC)
                name = normalized.toFile().nameWithoutExtension
            } catch (nfx: java.nio.file.NoSuchFileException) {
                throw NoSuchFileException(normalized.toFile()).also { it.initCause(nfx) }
@ -126,7 +127,7 @@ sealed class SourceCode {
                )
            }
            val stream = object {}.javaClass.getResourceAsStream(normalized)
-            text = stream!!.reader().use { it.readText() }
+            text = stream!!.reader().use { Normalizer.normalize(it.readText(), Normalizer.Form.NFC) }
            name = Path(pathString).toFile().nameWithoutExtension
        }
    }
--- a/compiler/test/TestAstChecks.kt
+++ b/compiler/test/TestAstChecks.kt
@ -154,10 +154,11 @@ class TestAstChecks: FunSpec({

 main {
    ubyte приблизительно = 99
+    ubyte นี่คือตัวอักษรภาษาไท = 42
    
    sub start() {
-        str knäckebröd = "crunchy"
-        prt(knäckebröd)
+        str knäckebröd = "crunchy"  ; with composed form
+        prt(knäckebröd)             ; with decomposed form
        printf(2*floats.π)
    }

@ -166,7 +167,7 @@ main {
    }

    sub printf(float fl) {
-        приблизительно++
+        นี่คือตัวอักษรภาษาไท++
    }
 }"""
        compileText(C64Target(), false, text, writeAssembly = true)  shouldNotBe null
--- a/examples/test.p8
+++ b/examples/test.p8
@ -1,10 +1,10 @@
 %import textio
-%zeropage basicsafe

 main {
    sub start() {
-        const ubyte HEIGHT=240
-        uword zz = 823423
-        txt.print_uw(320*HEIGHT/8/8)
+        ubyte knäckebröt = 99
+        cx16.r0L = knäckebröt
+        ubyte นี่คือตัวอักษรภาษาไท = 3
+        cx16.r0L = นี่คือตัวอักษรภาษาไท
    }
 }
--- a/parser/antlr/Prog8ANTLR.g4
+++ b/parser/antlr/Prog8ANTLR.g4
@ -24,7 +24,7 @@ BLOCK_COMMENT : '/*' ( BLOCK_COMMENT | . )*? '*/'  -> skip ;
 WS :  [ \t] -> skip ;
 // WS2 : '\\' EOL -> skip;
 VOID: 'void';
-NAME :  [\p{Letter}][\p{Letter}\p{Digit}_]* ;           // match unicode properties
+NAME :  [\p{Letter}][\p{Letter}\p{Mark}\p{Digit}_]* ;           // match unicode properties
 DEC_INTEGER :  ('0'..'9') | (('1'..'9')('0'..'9')+);
 HEX_INTEGER :  '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ;
 BIN_INTEGER :  '%' ('0' | '1')+ ;