fix some unicode identifier issues

This commit is contained in:
Irmen de Jong 2023-12-05 17:38:23 +01:00
parent 7d8b42d63e
commit f998888d6d
4 changed files with 12 additions and 10 deletions

View File

@ -3,6 +3,7 @@ package prog8.code.core
import java.io.File import java.io.File
import java.io.IOException import java.io.IOException
import java.nio.file.Path import java.nio.file.Path
import java.text.Normalizer
import kotlin.io.path.Path import kotlin.io.path.Path
import kotlin.io.path.readText import kotlin.io.path.readText
@ -94,7 +95,7 @@ sealed class SourceCode {
val normalized = path.normalize() val normalized = path.normalize()
origin = relative(normalized).toString() origin = relative(normalized).toString()
try { try {
text = normalized.readText() text = Normalizer.normalize(normalized.readText(), Normalizer.Form.NFC)
name = normalized.toFile().nameWithoutExtension name = normalized.toFile().nameWithoutExtension
} catch (nfx: java.nio.file.NoSuchFileException) { } catch (nfx: java.nio.file.NoSuchFileException) {
throw NoSuchFileException(normalized.toFile()).also { it.initCause(nfx) } throw NoSuchFileException(normalized.toFile()).also { it.initCause(nfx) }
@ -126,7 +127,7 @@ sealed class SourceCode {
) )
} }
val stream = object {}.javaClass.getResourceAsStream(normalized) val stream = object {}.javaClass.getResourceAsStream(normalized)
text = stream!!.reader().use { it.readText() } text = stream!!.reader().use { Normalizer.normalize(it.readText(), Normalizer.Form.NFC) }
name = Path(pathString).toFile().nameWithoutExtension name = Path(pathString).toFile().nameWithoutExtension
} }
} }

View File

@ -154,10 +154,11 @@ class TestAstChecks: FunSpec({
main { main {
ubyte приблизительно = 99 ubyte приблизительно = 99
ubyte อตวอกษรภาษาไท = 42
sub start() { sub start() {
str knäckebröd = "crunchy" str knäckebröd = "crunchy" ; with composed form
prt(knäckebröd) prt(knäckebröd) ; with decomposed form
printf(2*floats.π) printf(2*floats.π)
} }
@ -166,7 +167,7 @@ main {
} }
sub printf(float fl) { sub printf(float fl) {
приблизительно++ อตวอกษรภาษาไท++
} }
}""" }"""
compileText(C64Target(), false, text, writeAssembly = true) shouldNotBe null compileText(C64Target(), false, text, writeAssembly = true) shouldNotBe null

View File

@ -1,10 +1,10 @@
%import textio %import textio
%zeropage basicsafe
main { main {
sub start() { sub start() {
const ubyte HEIGHT=240 ubyte knäckebröt = 99
uword zz = 823423 cx16.r0L = knäckebröt
txt.print_uw(320*HEIGHT/8/8) ubyte นี่คือตัวอักษรภาษาไท = 3
cx16.r0L = นี่คือตัวอักษรภาษาไท
} }
} }

View File

@ -24,7 +24,7 @@ BLOCK_COMMENT : '/*' ( BLOCK_COMMENT | . )*? '*/' -> skip ;
WS : [ \t] -> skip ; WS : [ \t] -> skip ;
// WS2 : '\\' EOL -> skip; // WS2 : '\\' EOL -> skip;
VOID: 'void'; VOID: 'void';
NAME : [\p{Letter}][\p{Letter}\p{Digit}_]* ; // match unicode properties NAME : [\p{Letter}][\p{Letter}\p{Mark}\p{Digit}_]* ; // match unicode properties
DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+); DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+);
HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ; HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ;
BIN_INTEGER : '%' ('0' | '1')+ ; BIN_INTEGER : '%' ('0' | '1')+ ;