fix some unicode identifier issues

This commit is contained in:
Irmen de Jong 2023-12-05 17:38:23 +01:00
parent 7d8b42d63e
commit f998888d6d
4 changed files with 12 additions and 10 deletions

View File

@ -3,6 +3,7 @@ package prog8.code.core
import java.io.File
import java.io.IOException
import java.nio.file.Path
import java.text.Normalizer
import kotlin.io.path.Path
import kotlin.io.path.readText
@ -94,7 +95,7 @@ sealed class SourceCode {
val normalized = path.normalize()
origin = relative(normalized).toString()
try {
text = normalized.readText()
text = Normalizer.normalize(normalized.readText(), Normalizer.Form.NFC)
name = normalized.toFile().nameWithoutExtension
} catch (nfx: java.nio.file.NoSuchFileException) {
throw NoSuchFileException(normalized.toFile()).also { it.initCause(nfx) }
@ -126,7 +127,7 @@ sealed class SourceCode {
)
}
val stream = object {}.javaClass.getResourceAsStream(normalized)
text = stream!!.reader().use { it.readText() }
text = stream!!.reader().use { Normalizer.normalize(it.readText(), Normalizer.Form.NFC) }
name = Path(pathString).toFile().nameWithoutExtension
}
}

View File

@ -154,10 +154,11 @@ class TestAstChecks: FunSpec({
main {
ubyte приблизительно = 99
ubyte อตวอกษรภาษาไท = 42
sub start() {
str knäckebröd = "crunchy"
prt(knäckebröd)
str knäckebröd = "crunchy" ; with composed form
prt(knäckebröd) ; with decomposed form
printf(2*floats.π)
}
@ -166,7 +167,7 @@ main {
}
sub printf(float fl) {
приблизительно++
อตวอกษรภาษาไท++
}
}"""
compileText(C64Target(), false, text, writeAssembly = true) shouldNotBe null

View File

@ -1,10 +1,10 @@
%import textio
%zeropage basicsafe
main {
sub start() {
const ubyte HEIGHT=240
uword zz = 823423
txt.print_uw(320*HEIGHT/8/8)
ubyte knäckebröt = 99
cx16.r0L = knäckebröt
ubyte นี่คือตัวอักษรภาษาไท = 3
cx16.r0L = นี่คือตัวอักษรภาษาไท
}
}

View File

@ -24,7 +24,7 @@ BLOCK_COMMENT : '/*' ( BLOCK_COMMENT | . )*? '*/' -> skip ;
WS : [ \t] -> skip ;
// WS2 : '\\' EOL -> skip;
VOID: 'void';
NAME : [\p{Letter}][\p{Letter}\p{Digit}_]* ; // match unicode properties
NAME : [\p{Letter}][\p{Letter}\p{Mark}\p{Digit}_]* ; // match unicode properties
DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+);
HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ;
BIN_INTEGER : '%' ('0' | '1')+ ;