1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-01 06:29:53 +00:00

Added encconv module.

This commit is contained in:
Karol Stasiak 2019-09-04 01:14:14 +02:00
parent f3b5fe715c
commit e449b67a4a
8 changed files with 225 additions and 4 deletions

View File

@ -8,6 +8,8 @@
* Allowed passing non-decimal numbers to the `-D` option.
* Added the `encconv` module.
* Added `nullchar` constant as the null terminator for strings and `NULLCHAR` feature to define its value.
* Added `vectrex` text encoding.

View File

@ -22,6 +22,8 @@
* [Literals](lang/literals.md)
* [Predefined constants](lang/predefined_constants.md)
* [List of text encodings and escape sequences](lang/text.md)
* [Operators reference](lang/operators.md)
@ -44,6 +46,8 @@
* [Modules for reading input devices](stdlib/input.md)
* [`encconv` module](stdlib/encconv.md)
* [Other cross-platform modules (`keyboard`, `err`, `random`)](stdlib/other.md)
* [Definitions available on only some platforms](stdlib/frequent.md)

View File

@ -46,6 +46,8 @@
* [Modules for reading input devices](stdlib/input.md)
* [`encconv` module](stdlib/encconv.md)
* [Other cross-platform modules (`keyboard`, `err`, `random`)](stdlib/other.md)
* [Definitions available on only some platforms](stdlib/frequent.md)

View File

@ -54,6 +54,8 @@ The following features are defined based on the chosen CPU and compilation optio
`CPUFEATURE_8080`, `CPUFEATURE_8085`, `CPUFEATURE_GAMEBOY`, `CPUFEATURE_Z80`,
`CPUFEATURE_6502_ILLEGALS`, `CPUFEATURE_8085_ILLEGALS`, `CPUFEATURE_Z80_ILLEGALS` 1 if given instruction subset is enabled, 0 otherwise
* `ENCODING_SAME` - 1 if the encodings `default` and `src` are the same, 0 otherwise.
* `INIT_RW_MEMORY` 1 if the option `ram_init_segment` is defined, 0 otherwise.
See [the ROM vs RAM guide](../api/rom-vs-ram.md) for more information.

74
docs/stdlib/encconv.md Normal file
View File

@ -0,0 +1,74 @@
[< back to index](../doc_index.md)
## encconv
The `encconv` module provides functions for character set conversions.
All the functions are defined only for the characters that are valid in both input and output encoding.
Unsupported characters may give arbitrary results.
The unsupported characters are not guaranteed to roundtrip.
Some encodings (e.g. PETSCII) allow for multiple encoding of the same character.
For the input, all encodings are equivalent.
For the output, the canonical encoding is preferred.
Characters that are present in the input encoding,
but are encoded as multiple bytes in the output encoding, are not supported.
#### byte to_screencode(byte)
Converts a byte from the default encoding to the screen encoding.
If both encodings contain the character `¤`, then `to_screencode('¤') == '¤'scr`.
Available only if one of the following is true:
* the default encoding and the screen encoding are the same
* the default encoding is `petscii`, the screen encoding is `petscr`, and the platform is 6502-based
* the default encoding is `petsciijp`, the screen encoding is `petscrjp`, and the platform is 6502-based
* the default encoding is `atascii`, the screen encoding is `atasciiscr`, and the platform is 6502-based
#### byte from_screencode(byte)
Converts a byte from the screen encoding to the default encoding.
If both encodings contain the character `¤`, then `from_screencode('¤'scr) == '¤'`.
Available only if `to_screencode` is available.
#### byte petscii_to_petscr(byte)
Converts a byte from PETSCII to a CBM screencode.
Works also for the variants used on the Japanese version of C64.
Control characters are converted to reverse characters, the same as in the standard quote mode.
Available only on 6502-based platforms.
#### byte petscr_to_petscii(byte)
Converts a byte from a CBM screencode to PETSCII.
Works also for the variants used on the Japanese version of C64.
Reverse characters are interpreted as control characters or as non-reverse characters.
Available only on 6502-based platforms.
#### byte atascii_to_atasciiscr(byte)
Converts a byte from ATASCII to a Atari screencode.
Control characters <$80 are converted to the graphical characters that share the ATASCII code.
Control characters ≥$80 are not supported.
Available only on 6502-based platforms.
#### byte atasciiscr_to_atasciiscii(byte)
Converts a byte from a Atari screencode to ATASCII.
Characters that share their ATASCII code with control characters are supported,
but they require to be escaped with $1B to be printed.
Reverse characters are interpreted as non-reverse characters.
Available only on 6502-based platforms.

119
include/encconv.mfk Normal file
View File

@ -0,0 +1,119 @@
#if ENCODING_SAME
#if ARCH_6502
inline byte __byte_identity(byte a) { ? rts }
#elseif ARCH_I80
#pragma zilog_syntax
inline byte __byte_identity(byte a) { ? ret }
#else
inline byte __byte_identity(byte a) = a
#endif
alias from_screencode = __byte_identity
alias to_screencode = __byte_identity
#else
alias from_screencode = __from_screencode
alias to_screencode = __to_screencode
#endif
// conversions for particular encoding pairs:
#if ARCH_6502
asm byte petscii_to_petscr(byte a) {
cmp #$20
bcc __petscii_to_petscr_ddRev
cmp #$60
bcc __petscii_to_petscr_dd1
cmp #$80
bcc __petscii_to_petscr_dd2
cmp #$a0
bcc __petscii_to_petscr_dd3
cmp #$c0
bcc __petscii_to_petscr_dd4
cmp #$ff
bcc __petscii_to_petscr_ddRev
lda #$5e
rts
__petscii_to_petscr_dd2:
and #$5f
rts
__petscii_to_petscr_dd3:
ora #$40
rts
__petscii_to_petscr_dd4:
eor #$c0
rts
__petscii_to_petscr_dd1:
and #$3f
rts
__petscii_to_petscr_ddRev:
eor #$80
rts
}
asm byte petscr_to_petscii(byte a) {
cmp #$20
bcs __petscr_to_petscii_40
ora #$40
rts
__petscr_to_petscii_40:
cmp #$40
bcs __petscr_to_petscii_60
rts
__petscr_to_petscii_60:
cmp #$60
bcs __petscr_to_petscii_80
eor #$80
rts
__petscr_to_petscii_80:
cmp #$80
bcs __petscr_to_petscii_a0
eor #$c0
rts
__petscr_to_petscii_a0:
cmp #$a0
bcs __petscr_to_petscii_c0
eor #$80
rts
__petscr_to_petscii_c0:
eor #$40
rts
}
asm byte atascii_to_atasciiscr(byte a) {
and #$7f
cmp #$20
bcs __atascii_to_atasciiscr_60
ora #$40
rts
__atascii_to_atasciiscr_60:
cmp #$60
bcs __atascii_to_atasciiscr_end
sec
sbc #$20
__atascii_to_atasciiscr_end:
rts
}
asm byte atasciiscr_to_atascii(byte a) {
and #$7f
cmp #$40
bcs __atascii_to_atasciiscr_60
clc
adc #$20
rts
__atascii_to_atasciiscr_60:
cmp #$60
bcs __atascii_to_atasciiscr_end
and #$1f
__atascii_to_atasciiscr_end:
rts
}
#endif

View File

@ -245,7 +245,9 @@ object Platform {
debugOutputFormatName.toLowerCase(Locale.ROOT),
log.fatal(s"Invalid label file format: `$debugOutputFormatName`"))
val builtInFeatures = builtInCpuFeatures(cpu)
val builtInFeatures = builtInCpuFeatures(cpu) ++ Map(
"ENCODING_SAME" -> toLong(codec.name == srcCodec.name)
)
import scala.collection.JavaConverters._
val ds = conf.getSection("define")
@ -286,9 +288,10 @@ object Platform {
outputStyle)
}
@inline
private def toLong(b: Boolean): Long = if (b) 1L else 0L
def builtInCpuFeatures(cpu: Cpu.Value): Map[String, Long] = {
@inline
def toLong(b: Boolean): Long = if (b) 1L else 0L
Map[String, Long](
"ARCH_6502" -> toLong(CpuFamily.forType(cpu) == CpuFamily.M6502),
"CPU_6502" -> toLong(Set(Cpu.Mos, Cpu.StrictMos, Cpu.Ricoh, Cpu.StrictRicoh)(cpu)),

View File

@ -5,7 +5,7 @@ import java.nio.file.{Files, Paths}
import fastparse.core.Parsed.{Failure, Success}
import millfork.{CompilationFlag, CompilationOptions, Tarjan}
import millfork.node.{ImportStatement, Position, Program}
import millfork.node.{AliasDefinitionStatement, DeclarationStatement, ImportStatement, Position, Program}
import scala.collection.mutable
import scala.collection.convert.ImplicitConversionsToScala._
@ -23,6 +23,20 @@ abstract class AbstractSourceLoadingQueue[T](val initialFilenames: List[String],
def enqueueStandardModules(): Unit
def pseudoModules: List[DeclarationStatement] = {
val encodingConversionAliases = (options.platform.defaultCodec.name, options.platform.screenCodec.name) match {
case (TextCodec.Petscii.name, TextCodec.CbmScreencodes.name) |
(TextCodec.PetsciiJp.name, TextCodec.CbmScreencodesJp.name)=>
List(AliasDefinitionStatement("__from_screencode", "petscr_to_petscii", important = false),
AliasDefinitionStatement("__to_screencode", "petscii_to_petscr", important = false))
case (TextCodec.Atascii.name, TextCodec.AtasciiScreencodes.name)=>
List(AliasDefinitionStatement("__from_screencode", "atasciiscr_to_atascii", important = false),
AliasDefinitionStatement("__to_screencode", "atascii_to_atasciiscr", important = false))
case _ => Nil
}
encodingConversionAliases
}
def run(): Program = {
for {
initialFilename <- initialFilenames
@ -90,6 +104,7 @@ abstract class AbstractSourceLoadingQueue[T](val initialFilenames: List[String],
}
val parser = createParser(shortFileName, src, parentDir, featureConstants, pragmas.keySet)
options.log.addSource(shortFileName, src.linesIterator.toIndexedSeq)
parsedModules.put("pseudomodule\u0000", Program(pseudoModules))
parser.toAst match {
case Success(prog, _) =>
parsedModules.synchronized {