From e449b67a4a88ecf0affa3854de68c3f3d89bd25c Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Wed, 4 Sep 2019 01:14:14 +0200 Subject: [PATCH] Added encconv module. --- CHANGELOG.md | 2 + docs/README.md | 4 + docs/doc_index.md | 2 + docs/lang/preprocessor.md | 2 + docs/stdlib/encconv.md | 74 +++++++++++ include/encconv.mfk | 119 ++++++++++++++++++ src/main/scala/millfork/Platform.scala | 9 +- .../parser/AbstractSourceLoadingQueue.scala | 17 ++- 8 files changed, 225 insertions(+), 4 deletions(-) create mode 100644 docs/stdlib/encconv.md create mode 100644 include/encconv.mfk diff --git a/CHANGELOG.md b/CHANGELOG.md index f280b3a9..caddefbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ * Allowed passing non-decimal numbers to the `-D` option. +* Added the `encconv` module. + * Added `nullchar` constant as the null terminator for strings and `NULLCHAR` feature to define its value. * Added `vectrex` text encoding. diff --git a/docs/README.md b/docs/README.md index a1f854d8..87c99b42 100644 --- a/docs/README.md +++ b/docs/README.md @@ -22,6 +22,8 @@ * [Literals](lang/literals.md) +* [Predefined constants](lang/predefined_constants.md) + * [List of text encodings and escape sequences](lang/text.md) * [Operators reference](lang/operators.md) @@ -44,6 +46,8 @@ * [Modules for reading input devices](stdlib/input.md) +* [`encconv` module](stdlib/encconv.md) + * [Other cross-platform modules (`keyboard`, `err`, `random`)](stdlib/other.md) * [Definitions available on only some platforms](stdlib/frequent.md) diff --git a/docs/doc_index.md b/docs/doc_index.md index 811da15f..87c99b42 100644 --- a/docs/doc_index.md +++ b/docs/doc_index.md @@ -46,6 +46,8 @@ * [Modules for reading input devices](stdlib/input.md) +* [`encconv` module](stdlib/encconv.md) + * [Other cross-platform modules (`keyboard`, `err`, `random`)](stdlib/other.md) * [Definitions available on only some platforms](stdlib/frequent.md) diff --git a/docs/lang/preprocessor.md b/docs/lang/preprocessor.md index a695b606..dcabc769 100644 --- a/docs/lang/preprocessor.md +++ b/docs/lang/preprocessor.md @@ -54,6 +54,8 @@ The following features are defined based on the chosen CPU and compilation optio `CPUFEATURE_8080`, `CPUFEATURE_8085`, `CPUFEATURE_GAMEBOY`, `CPUFEATURE_Z80`, `CPUFEATURE_6502_ILLEGALS`, `CPUFEATURE_8085_ILLEGALS`, `CPUFEATURE_Z80_ILLEGALS` – 1 if given instruction subset is enabled, 0 otherwise +* `ENCODING_SAME` - 1 if the encodings `default` and `src` are the same, 0 otherwise. + * `INIT_RW_MEMORY` – 1 if the option `ram_init_segment` is defined, 0 otherwise. See [the ROM vs RAM guide](../api/rom-vs-ram.md) for more information. diff --git a/docs/stdlib/encconv.md b/docs/stdlib/encconv.md new file mode 100644 index 00000000..ad5bd1ae --- /dev/null +++ b/docs/stdlib/encconv.md @@ -0,0 +1,74 @@ +[< back to index](../doc_index.md) + +## encconv + +The `encconv` module provides functions for character set conversions. + +All the functions are defined only for the characters that are valid in both input and output encoding. +Unsupported characters may give arbitrary results. +The unsupported characters are not guaranteed to roundtrip. + +Some encodings (e.g. PETSCII) allow for multiple encoding of the same character. +For the input, all encodings are equivalent. +For the output, the canonical encoding is preferred. + +Characters that are present in the input encoding, +but are encoded as multiple bytes in the output encoding, are not supported. + +#### byte to_screencode(byte) + +Converts a byte from the default encoding to the screen encoding. + +If both encodings contain the character `¤`, then `to_screencode('¤') == '¤'scr`. + +Available only if one of the following is true: + +* the default encoding and the screen encoding are the same + +* the default encoding is `petscii`, the screen encoding is `petscr`, and the platform is 6502-based + +* the default encoding is `petsciijp`, the screen encoding is `petscrjp`, and the platform is 6502-based + +* the default encoding is `atascii`, the screen encoding is `atasciiscr`, and the platform is 6502-based + +#### byte from_screencode(byte) + +Converts a byte from the screen encoding to the default encoding. + +If both encodings contain the character `¤`, then `from_screencode('¤'scr) == '¤'`. + +Available only if `to_screencode` is available. + +#### byte petscii_to_petscr(byte) + +Converts a byte from PETSCII to a CBM screencode. +Works also for the variants used on the Japanese version of C64. +Control characters are converted to reverse characters, the same as in the standard quote mode. + +Available only on 6502-based platforms. + +#### byte petscr_to_petscii(byte) + +Converts a byte from a CBM screencode to PETSCII. +Works also for the variants used on the Japanese version of C64. +Reverse characters are interpreted as control characters or as non-reverse characters. + +Available only on 6502-based platforms. + +#### byte atascii_to_atasciiscr(byte) + +Converts a byte from ATASCII to a Atari screencode. +Control characters <$80 are converted to the graphical characters that share the ATASCII code. +Control characters ≥$80 are not supported. + +Available only on 6502-based platforms. + +#### byte atasciiscr_to_atasciiscii(byte) + +Converts a byte from a Atari screencode to ATASCII. +Characters that share their ATASCII code with control characters are supported, +but they require to be escaped with $1B to be printed. +Reverse characters are interpreted as non-reverse characters. + +Available only on 6502-based platforms. + diff --git a/include/encconv.mfk b/include/encconv.mfk new file mode 100644 index 00000000..acb35ea5 --- /dev/null +++ b/include/encconv.mfk @@ -0,0 +1,119 @@ +#if ENCODING_SAME + +#if ARCH_6502 +inline byte __byte_identity(byte a) { ? rts } +#elseif ARCH_I80 +#pragma zilog_syntax +inline byte __byte_identity(byte a) { ? ret } +#else +inline byte __byte_identity(byte a) = a +#endif + +alias from_screencode = __byte_identity +alias to_screencode = __byte_identity + +#else + +alias from_screencode = __from_screencode +alias to_screencode = __to_screencode + +#endif + + +// conversions for particular encoding pairs: + +#if ARCH_6502 + +asm byte petscii_to_petscr(byte a) { + cmp #$20 + bcc __petscii_to_petscr_ddRev + cmp #$60 + bcc __petscii_to_petscr_dd1 + cmp #$80 + bcc __petscii_to_petscr_dd2 + cmp #$a0 + bcc __petscii_to_petscr_dd3 + cmp #$c0 + bcc __petscii_to_petscr_dd4 + cmp #$ff + bcc __petscii_to_petscr_ddRev + lda #$5e + rts +__petscii_to_petscr_dd2: + and #$5f + rts +__petscii_to_petscr_dd3: + ora #$40 + rts +__petscii_to_petscr_dd4: + eor #$c0 + rts +__petscii_to_petscr_dd1: + and #$3f + rts +__petscii_to_petscr_ddRev: + eor #$80 + rts +} + +asm byte petscr_to_petscii(byte a) { + cmp #$20 + bcs __petscr_to_petscii_40 + ora #$40 + rts +__petscr_to_petscii_40: + cmp #$40 + bcs __petscr_to_petscii_60 + rts +__petscr_to_petscii_60: + cmp #$60 + bcs __petscr_to_petscii_80 + eor #$80 + rts +__petscr_to_petscii_80: + cmp #$80 + bcs __petscr_to_petscii_a0 + eor #$c0 + rts +__petscr_to_petscii_a0: + cmp #$a0 + bcs __petscr_to_petscii_c0 + eor #$80 + rts +__petscr_to_petscii_c0: + eor #$40 + rts +} + +asm byte atascii_to_atasciiscr(byte a) { + and #$7f + cmp #$20 + bcs __atascii_to_atasciiscr_60 + ora #$40 + rts +__atascii_to_atasciiscr_60: + cmp #$60 + bcs __atascii_to_atasciiscr_end + sec + sbc #$20 +__atascii_to_atasciiscr_end: + rts +} + + +asm byte atasciiscr_to_atascii(byte a) { + and #$7f + cmp #$40 + bcs __atascii_to_atasciiscr_60 + clc + adc #$20 + rts +__atascii_to_atasciiscr_60: + cmp #$60 + bcs __atascii_to_atasciiscr_end + and #$1f +__atascii_to_atasciiscr_end: + rts +} + +#endif \ No newline at end of file diff --git a/src/main/scala/millfork/Platform.scala b/src/main/scala/millfork/Platform.scala index cea2070c..1a68f4a3 100644 --- a/src/main/scala/millfork/Platform.scala +++ b/src/main/scala/millfork/Platform.scala @@ -245,7 +245,9 @@ object Platform { debugOutputFormatName.toLowerCase(Locale.ROOT), log.fatal(s"Invalid label file format: `$debugOutputFormatName`")) - val builtInFeatures = builtInCpuFeatures(cpu) + val builtInFeatures = builtInCpuFeatures(cpu) ++ Map( + "ENCODING_SAME" -> toLong(codec.name == srcCodec.name) + ) import scala.collection.JavaConverters._ val ds = conf.getSection("define") @@ -286,9 +288,10 @@ object Platform { outputStyle) } + @inline + private def toLong(b: Boolean): Long = if (b) 1L else 0L + def builtInCpuFeatures(cpu: Cpu.Value): Map[String, Long] = { - @inline - def toLong(b: Boolean): Long = if (b) 1L else 0L Map[String, Long]( "ARCH_6502" -> toLong(CpuFamily.forType(cpu) == CpuFamily.M6502), "CPU_6502" -> toLong(Set(Cpu.Mos, Cpu.StrictMos, Cpu.Ricoh, Cpu.StrictRicoh)(cpu)), diff --git a/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala b/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala index fd71586f..8aa4a944 100644 --- a/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala +++ b/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala @@ -5,7 +5,7 @@ import java.nio.file.{Files, Paths} import fastparse.core.Parsed.{Failure, Success} import millfork.{CompilationFlag, CompilationOptions, Tarjan} -import millfork.node.{ImportStatement, Position, Program} +import millfork.node.{AliasDefinitionStatement, DeclarationStatement, ImportStatement, Position, Program} import scala.collection.mutable import scala.collection.convert.ImplicitConversionsToScala._ @@ -23,6 +23,20 @@ abstract class AbstractSourceLoadingQueue[T](val initialFilenames: List[String], def enqueueStandardModules(): Unit + def pseudoModules: List[DeclarationStatement] = { + val encodingConversionAliases = (options.platform.defaultCodec.name, options.platform.screenCodec.name) match { + case (TextCodec.Petscii.name, TextCodec.CbmScreencodes.name) | + (TextCodec.PetsciiJp.name, TextCodec.CbmScreencodesJp.name)=> + List(AliasDefinitionStatement("__from_screencode", "petscr_to_petscii", important = false), + AliasDefinitionStatement("__to_screencode", "petscii_to_petscr", important = false)) + case (TextCodec.Atascii.name, TextCodec.AtasciiScreencodes.name)=> + List(AliasDefinitionStatement("__from_screencode", "atasciiscr_to_atascii", important = false), + AliasDefinitionStatement("__to_screencode", "atascii_to_atasciiscr", important = false)) + case _ => Nil + } + encodingConversionAliases + } + def run(): Program = { for { initialFilename <- initialFilenames @@ -90,6 +104,7 @@ abstract class AbstractSourceLoadingQueue[T](val initialFilenames: List[String], } val parser = createParser(shortFileName, src, parentDir, featureConstants, pragmas.keySet) options.log.addSource(shortFileName, src.linesIterator.toIndexedSeq) + parsedModules.put("pseudomodule\u0000", Program(pseudoModules)) parser.toAst match { case Success(prog, _) => parsedModules.synchronized {