translate newline '\n' to char code 13 in various encodings such as ISO (used to be 10)

This means that when printed, such newlines will now properly go to the next line in these encodings too (ISO variants, KATAKANA).
This commit is contained in:
Irmen de Jong
2025-05-11 05:27:18 +02:00
parent 78b1076110
commit 37d4055036
16 changed files with 134 additions and 60 deletions
+4 -1
View File
@@ -6,7 +6,10 @@ import prog8.code.target.zp.C128Zeropage
import java.nio.file.Path
class C128Target: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by NormalMemSizer(Mflpt5.FLOAT_MEM_SIZE) {
class C128Target: ICompilationTarget,
IStringEncoding by Encoder(true),
IMemSizer by NormalMemSizer(Mflpt5.FLOAT_MEM_SIZE) {
override val name = NAME
override val defaultEncoding = Encoding.PETSCII
override val libraryPath = null
+4 -1
View File
@@ -7,7 +7,10 @@ import java.io.IOException
import java.nio.file.Path
class C64Target: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by NormalMemSizer(Mflpt5.Companion.FLOAT_MEM_SIZE) {
class C64Target: ICompilationTarget,
IStringEncoding by Encoder(true),
IMemSizer by NormalMemSizer(Mflpt5.Companion.FLOAT_MEM_SIZE) {
override val name = NAME
override val defaultEncoding = Encoding.PETSCII
override val libraryPath = null
@@ -37,7 +37,7 @@ class ConfigFileTarget(
val zpFullsafe: List<UIntRange>,
val zpKernalsafe: List<UIntRange>,
val zpBasicsafe: List<UIntRange>
): ICompilationTarget, IStringEncoding by Encoder, IMemSizer by NormalMemSizer(8) {
): ICompilationTarget, IStringEncoding by Encoder(true), IMemSizer by NormalMemSizer(8) {
companion object {
+4 -1
View File
@@ -6,7 +6,10 @@ import prog8.code.target.zp.CX16Zeropage
import java.nio.file.Path
class Cx16Target: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by NormalMemSizer(Mflpt5.Companion.FLOAT_MEM_SIZE) {
class Cx16Target: ICompilationTarget,
IStringEncoding by Encoder(true),
IMemSizer by NormalMemSizer(Mflpt5.Companion.FLOAT_MEM_SIZE) {
override val name = NAME
override val defaultEncoding = Encoding.PETSCII
override val libraryPath = null
+4 -1
View File
@@ -6,7 +6,10 @@ import prog8.code.target.zp.PETZeropage
import java.nio.file.Path
class PETTarget: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by NormalMemSizer(Mflpt5.Companion.FLOAT_MEM_SIZE) {
class PETTarget: ICompilationTarget,
IStringEncoding by Encoder(true),
IMemSizer by NormalMemSizer(Mflpt5.Companion.FLOAT_MEM_SIZE) {
override val name = NAME
override val defaultEncoding = Encoding.PETSCII
override val libraryPath = null
+4 -1
View File
@@ -7,7 +7,10 @@ import kotlin.io.path.isReadable
import kotlin.io.path.name
import kotlin.io.path.readText
class VMTarget: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by NormalMemSizer(FLOAT_MEM_SIZE) {
class VMTarget: ICompilationTarget,
IStringEncoding by Encoder(false),
IMemSizer by NormalMemSizer(FLOAT_MEM_SIZE) {
override val name = NAME
override val defaultEncoding = Encoding.ISO
override val libraryPath = null
@@ -197,6 +197,7 @@ object AtasciiEncoding {
fun encode(str: String): Result<List<UByte>, CharConversionException> {
val mapped = str.map { chr ->
when (chr) {
'\r' -> 0x9bu
'\u0000' -> 0u
in '\u8000'..'\u80ff' -> {
// special case: take the lower 8 bit hex value directly
@@ -285,6 +285,7 @@ object C64osEncoding {
val screencode = encodingC64os[chr]
return screencode?.toUByte() ?: when (chr) {
'\u0000' -> 0u
'\n' -> 13u
in '\u8000'..'\u80ff' -> {
// special case: take the lower 8 bit hex value directly
(chr.code - 0x8000).toUByte()
@@ -5,19 +5,19 @@ import prog8.code.core.Encoding
import prog8.code.core.IStringEncoding
import prog8.code.core.InternalCompilerException
object Encoder: IStringEncoding {
class Encoder(val newlineToCarriageReturn: Boolean): IStringEncoding {
override val defaultEncoding: Encoding = Encoding.ISO
override fun encodeString(str: String, encoding: Encoding): List<UByte> {
val coded = when(encoding) {
Encoding.PETSCII -> PetsciiEncoding.encodePetscii(str, true)
Encoding.SCREENCODES -> PetsciiEncoding.encodeScreencode(str, true)
Encoding.ISO -> IsoEncoding.encode(str)
Encoding.ATASCII -> AtasciiEncoding.encode(str)
Encoding.ISO5 -> IsoCyrillicEncoding.encode(str)
Encoding.ISO16 -> IsoEasternEncoding.encode(str)
Encoding.ISO -> IsoEncoding.encode(str, newlineToCarriageReturn)
Encoding.ISO5 -> IsoCyrillicEncoding.encode(str, newlineToCarriageReturn)
Encoding.ISO16 -> IsoEasternEncoding.encode(str, newlineToCarriageReturn)
Encoding.CP437 -> Cp437Encoding.encode(str)
Encoding.KATAKANA -> KatakanaEncoding.encode(str)
Encoding.KATAKANA -> KatakanaEncoding.encode(str, newlineToCarriageReturn)
Encoding.ATASCII -> AtasciiEncoding.encode(str)
Encoding.C64OS -> C64osEncoding.encode(str)
else -> throw InternalCompilerException("unsupported encoding $encoding")
}
@@ -30,12 +30,12 @@ object Encoder: IStringEncoding {
val decoded = when(encoding) {
Encoding.PETSCII -> PetsciiEncoding.decodePetscii(bytes, true)
Encoding.SCREENCODES -> PetsciiEncoding.decodeScreencode(bytes, true)
Encoding.ISO -> IsoEncoding.decode(bytes)
Encoding.ATASCII -> AtasciiEncoding.decode(bytes)
Encoding.ISO5 -> IsoCyrillicEncoding.decode(bytes)
Encoding.ISO16 -> IsoEasternEncoding.decode(bytes)
Encoding.ISO -> IsoEncoding.decode(bytes, newlineToCarriageReturn)
Encoding.ISO5 -> IsoCyrillicEncoding.decode(bytes, newlineToCarriageReturn)
Encoding.ISO16 -> IsoEasternEncoding.decode(bytes, newlineToCarriageReturn)
Encoding.CP437 -> Cp437Encoding.decode(bytes)
Encoding.KATAKANA -> KatakanaEncoding.decode(bytes)
Encoding.KATAKANA -> KatakanaEncoding.decode(bytes, newlineToCarriageReturn)
Encoding.ATASCII -> AtasciiEncoding.decode(bytes)
Encoding.C64OS -> C64osEncoding.decode(bytes)
else -> throw InternalCompilerException("unsupported encoding $encoding")
}
@@ -6,14 +6,16 @@ import com.github.michaelbull.result.Result
import java.io.CharConversionException
import java.nio.charset.Charset
open class IsoEncodingBase(charsetName: String) {
val charset: Charset = Charset.forName(charsetName)
fun encode(str: String): Result<List<UByte>, CharConversionException> {
fun encode(str: String, newlineToCarriageReturn: Boolean): Result<List<UByte>, CharConversionException> {
return try {
val mapped = str.map { chr ->
when (chr) {
'\u0000' -> 0u
'\n' -> if(newlineToCarriageReturn) 13u else 10u
in '\u8000'..'\u80ff' -> {
// special case: take the lower 8 bit hex value directly
(chr.code - 0x8000).toUByte()
@@ -27,9 +29,14 @@ open class IsoEncodingBase(charsetName: String) {
}
}
fun decode(bytes: Iterable<UByte>): Result<String, CharConversionException> {
fun decode(bytes: Iterable<UByte>, newlineToCarriageReturn: Boolean): Result<String, CharConversionException> {
return try {
Ok(String(bytes.map { it.toByte() }.toByteArray(), charset))
Ok(String(bytes.map {
when(it) {
13u.toUByte() -> if(newlineToCarriageReturn) 10 else 13
else -> it.toByte()
}
}.toByteArray(), charset))
} catch (ce: CharConversionException) {
Err(ce)
}
@@ -64,10 +64,11 @@ object JapaneseCharacterConverter {
object KatakanaEncoding {
val charset: Charset = Charset.forName("JIS_X0201")
fun encode(str: String): Result<List<UByte>, CharConversionException> {
fun encode(str: String, newlineToCarriageReturn: Boolean): Result<List<UByte>, CharConversionException> {
return try {
val mapped = str.map { chr ->
when (chr) {
'\n' -> if(newlineToCarriageReturn) 13u else 10u
'\u0000' -> 0u
'\u00a0' -> 0xa0u // $a0 isn't technically a part of JIS X 0201 spec, and so we need to handle this ourselves
@@ -112,9 +113,14 @@ object KatakanaEncoding {
}
}
fun decode(bytes: Iterable<UByte>): Result<String, CharConversionException> {
fun decode(bytes: Iterable<UByte>, newlineToCarriageReturn: Boolean): Result<String, CharConversionException> {
return try {
Ok(String(bytes.map { it.toByte() }.toByteArray(), charset))
Ok(String(bytes.map {
when(it) {
13u.toUByte() -> if(newlineToCarriageReturn) 10 else 13
else -> it.toByte()
}
}.toByteArray(), charset))
} catch (ce: CharConversionException) {
Err(ce)
}
@@ -21,7 +21,7 @@ object PetsciiEncoding {
'\ufffe', // 0x07 -> UNDEFINED
'\uf118', // 0x08 -> DISABLE CHARACTER SET SWITCHING (CUS)
'\uf119', // 0x09 -> ENABLE CHARACTER SET SWITCHING (CUS)
'\ufffe', // 0x0A -> UNDEFINED
'\n', // 0x0A -> LINE FEED (RETURN)
'\ufffe', // 0x0B -> UNDEFINED
'\ufffe', // 0x0C -> UNDEFINED
'\n' , // 0x0D -> LINE FEED (RETURN)
@@ -1117,6 +1117,8 @@ object PetsciiEncoding {
val screencode = if(lowercase) encodingScreencodeLowercase[chr] else encodingScreencodeUppercase[chr]
return screencode?.toUByte() ?: when (chr) {
'\u0000' -> 0u
'\n' -> 141u
'\r' -> 141u
in '\u8000'..'\u80ff' -> {
// special case: take the lower 8 bit hex value directly
(chr.code - 0x8000).toUByte()