From 7f9bd18bddd218efac76a67327e0e86714aa81c0 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Fri, 1 May 2020 01:31:54 +0200 Subject: [PATCH] The big text encoding overhaul --- .gitignore | 3 - CHANGELOG.md | 24 + docs/doc_index.md | 2 + docs/lang/custom-encoding.md | 52 ++ docs/lang/text.md | 123 +++- include/encoding/amstrad_cpm.tbl | 1 + include/encoding/apple2.tbl | 13 + include/encoding/apple2c.tbl | 15 + include/encoding/apple2e.tbl | 14 + include/encoding/apple2gs.tbl | 12 + include/encoding/ascii.tbl | 15 + include/encoding/atari.tbl | 1 + include/encoding/atariscr.tbl | 1 + include/encoding/atascii.tbl | 23 + include/encoding/atasciiscr.tbl | 19 + include/encoding/bbc.tbl | 19 + include/encoding/cbmscr.tbl | 21 + include/encoding/cbmscrjp.tbl | 56 ++ include/encoding/cp1250.tbl | 32 + include/encoding/cp1251.tbl | 28 + include/encoding/cp1252.tbl | 34 + include/encoding/cp437.tbl | 30 + include/encoding/cp850.tbl | 31 + include/encoding/cp851.tbl | 30 + include/encoding/cp852.tbl | 28 + include/encoding/cp855.tbl | 25 + include/encoding/cp858.tbl | 32 + include/encoding/cp866.tbl | 24 + include/encoding/cpc_da.tbl | 27 + include/encoding/cpc_en.tbl | 21 + include/encoding/cpc_es.tbl | 21 + include/encoding/cpc_fr.tbl | 22 + include/encoding/ebcdic.tbl | 26 + include/encoding/galaksija.tbl | 16 + include/encoding/iso1.tbl | 1 + include/encoding/iso10.tbl | 1 + include/encoding/iso13.tbl | 1 + include/encoding/iso14.tbl | 1 + include/encoding/iso15.tbl | 1 + include/encoding/iso16.tbl | 1 + include/encoding/iso2.tbl | 1 + include/encoding/iso3.tbl | 1 + include/encoding/iso4.tbl | 1 + include/encoding/iso5.tbl | 1 + include/encoding/iso7.tbl | 1 + include/encoding/iso8859_1.tbl | 29 + include/encoding/iso8859_10.tbl | 27 + include/encoding/iso8859_13.tbl | 30 + include/encoding/iso8859_14.tbl | 29 + include/encoding/iso8859_15.tbl | 30 + include/encoding/iso8859_16.tbl | 29 + include/encoding/iso8859_2.tbl | 25 + include/encoding/iso8859_3.tbl | 29 + include/encoding/iso8859_4.tbl | 25 + include/encoding/iso8859_5.tbl | 24 + include/encoding/iso8859_7.tbl | 31 + include/encoding/iso8859_9.tbl | 31 + include/encoding/iso9.tbl | 1 + include/encoding/iso_de.tbl | 22 + include/encoding/iso_dk.tbl | 1 + include/encoding/iso_fi.tbl | 1 + include/encoding/iso_no.tbl | 33 + include/encoding/iso_se.tbl | 29 + include/encoding/iso_yu.tbl | 22 + include/encoding/jis.tbl | 49 ++ include/encoding/jisx.tbl | 1 + include/encoding/kamenicky.tbl | 27 + include/encoding/koi7n2.tbl | 19 + include/encoding/latin0.tbl | 1 + include/encoding/latin1.tbl | 1 + include/encoding/latin10.tbl | 2 + include/encoding/latin2.tbl | 2 + include/encoding/latin3.tbl | 2 + include/encoding/latin4.tbl | 2 + include/encoding/latin5.tbl | 2 + include/encoding/latin6.tbl | 2 + include/encoding/latin7.tbl | 2 + include/encoding/latin8.tbl | 2 + include/encoding/latin9.tbl | 1 + include/encoding/mazovia.tbl | 29 + include/encoding/msx_br.tbl | 50 ++ include/encoding/msx_de.tbl | 1 + include/encoding/msx_es.tbl | 1 + include/encoding/msx_fr.tbl | 1 + include/encoding/msx_intl.tbl | 49 ++ include/encoding/msx_jp.tbl | 77 ++ include/encoding/msx_ru.tbl | 41 ++ include/encoding/msx_uk.tbl | 1 + include/encoding/msx_us.tbl | 1 + include/encoding/oldpet.tbl | 1 + include/encoding/oldpetscii.tbl | 26 + include/encoding/origpet.tbl | 1 + include/encoding/origpetscii.tbl | 26 + include/encoding/pcw.tbl | 57 ++ include/encoding/pet.tbl | 1 + include/encoding/petjp.tbl | 1 + include/encoding/petscii.tbl | 35 + include/encoding/petsciijp.tbl | 69 ++ include/encoding/petscr.tbl | 1 + include/encoding/petscrjp.tbl | 1 + include/encoding/pokemon1de.tbl | 1 + include/encoding/pokemon1en.tbl | 50 ++ include/encoding/pokemon1es.tbl | 30 + include/encoding/pokemon1fr.tbl | 46 ++ include/encoding/pokemon1it.tbl | 1 + include/encoding/pokemon1jp.tbl | 26 + include/encoding/short_koi.tbl | 1 + include/encoding/sinclair.tbl | 39 ++ include/encoding/utf16be.tbl | 2 + include/encoding/utf16le.tbl | 2 + include/encoding/utf8.tbl | 2 + include/encoding/vectrex.tbl | 14 + include/encoding/zx80.tbl | 18 + include/encoding/zx81.tbl | 18 + include/platform/cpc464.ini | 2 +- mkdocs.yml | 1 + .../scala/millfork/CompilationOptions.scala | 2 + src/main/scala/millfork/Main.scala | 7 +- src/main/scala/millfork/Platform.scala | 21 +- .../parser/AbstractSourceLoadingQueue.scala | 7 +- src/main/scala/millfork/parser/MfParser.scala | 2 +- .../scala/millfork/parser/TextCodec.scala | 659 +----------------- .../millfork/parser/TextCodecRepository.scala | 234 +++++++ .../scala/millfork/test/ZLineSizeSuite.scala | 2 +- .../test/auxilary/EncodingSanitySuite.scala | 50 ++ .../scala/millfork/test/emu/EmuI86Run.scala | 4 +- .../scala/millfork/test/emu/EmuM6809Run.scala | 4 +- .../scala/millfork/test/emu/EmuPlatform.scala | 4 +- src/test/scala/millfork/test/emu/EmuRun.scala | 8 +- .../scala/millfork/test/emu/EmuZ80Run.scala | 4 +- .../millfork/test/emu/ShouldNotCompile.scala | 2 +- .../millfork/test/emu/ShouldNotParse.scala | 2 +- 132 files changed, 2453 insertions(+), 697 deletions(-) create mode 100644 docs/lang/custom-encoding.md create mode 100644 include/encoding/amstrad_cpm.tbl create mode 100644 include/encoding/apple2.tbl create mode 100644 include/encoding/apple2c.tbl create mode 100644 include/encoding/apple2e.tbl create mode 100644 include/encoding/apple2gs.tbl create mode 100644 include/encoding/ascii.tbl create mode 100644 include/encoding/atari.tbl create mode 100644 include/encoding/atariscr.tbl create mode 100644 include/encoding/atascii.tbl create mode 100644 include/encoding/atasciiscr.tbl create mode 100644 include/encoding/bbc.tbl create mode 100644 include/encoding/cbmscr.tbl create mode 100644 include/encoding/cbmscrjp.tbl create mode 100644 include/encoding/cp1250.tbl create mode 100644 include/encoding/cp1251.tbl create mode 100644 include/encoding/cp1252.tbl create mode 100644 include/encoding/cp437.tbl create mode 100644 include/encoding/cp850.tbl create mode 100644 include/encoding/cp851.tbl create mode 100644 include/encoding/cp852.tbl create mode 100644 include/encoding/cp855.tbl create mode 100644 include/encoding/cp858.tbl create mode 100644 include/encoding/cp866.tbl create mode 100644 include/encoding/cpc_da.tbl create mode 100644 include/encoding/cpc_en.tbl create mode 100644 include/encoding/cpc_es.tbl create mode 100644 include/encoding/cpc_fr.tbl create mode 100644 include/encoding/ebcdic.tbl create mode 100644 include/encoding/galaksija.tbl create mode 100644 include/encoding/iso1.tbl create mode 100644 include/encoding/iso10.tbl create mode 100644 include/encoding/iso13.tbl create mode 100644 include/encoding/iso14.tbl create mode 100644 include/encoding/iso15.tbl create mode 100644 include/encoding/iso16.tbl create mode 100644 include/encoding/iso2.tbl create mode 100644 include/encoding/iso3.tbl create mode 100644 include/encoding/iso4.tbl create mode 100644 include/encoding/iso5.tbl create mode 100644 include/encoding/iso7.tbl create mode 100644 include/encoding/iso8859_1.tbl create mode 100644 include/encoding/iso8859_10.tbl create mode 100644 include/encoding/iso8859_13.tbl create mode 100644 include/encoding/iso8859_14.tbl create mode 100644 include/encoding/iso8859_15.tbl create mode 100644 include/encoding/iso8859_16.tbl create mode 100644 include/encoding/iso8859_2.tbl create mode 100644 include/encoding/iso8859_3.tbl create mode 100644 include/encoding/iso8859_4.tbl create mode 100644 include/encoding/iso8859_5.tbl create mode 100644 include/encoding/iso8859_7.tbl create mode 100644 include/encoding/iso8859_9.tbl create mode 100644 include/encoding/iso9.tbl create mode 100644 include/encoding/iso_de.tbl create mode 100644 include/encoding/iso_dk.tbl create mode 100644 include/encoding/iso_fi.tbl create mode 100644 include/encoding/iso_no.tbl create mode 100644 include/encoding/iso_se.tbl create mode 100644 include/encoding/iso_yu.tbl create mode 100644 include/encoding/jis.tbl create mode 100644 include/encoding/jisx.tbl create mode 100644 include/encoding/kamenicky.tbl create mode 100644 include/encoding/koi7n2.tbl create mode 100644 include/encoding/latin0.tbl create mode 100644 include/encoding/latin1.tbl create mode 100644 include/encoding/latin10.tbl create mode 100644 include/encoding/latin2.tbl create mode 100644 include/encoding/latin3.tbl create mode 100644 include/encoding/latin4.tbl create mode 100644 include/encoding/latin5.tbl create mode 100644 include/encoding/latin6.tbl create mode 100644 include/encoding/latin7.tbl create mode 100644 include/encoding/latin8.tbl create mode 100644 include/encoding/latin9.tbl create mode 100644 include/encoding/mazovia.tbl create mode 100644 include/encoding/msx_br.tbl create mode 100644 include/encoding/msx_de.tbl create mode 100644 include/encoding/msx_es.tbl create mode 100644 include/encoding/msx_fr.tbl create mode 100644 include/encoding/msx_intl.tbl create mode 100644 include/encoding/msx_jp.tbl create mode 100644 include/encoding/msx_ru.tbl create mode 100644 include/encoding/msx_uk.tbl create mode 100644 include/encoding/msx_us.tbl create mode 100644 include/encoding/oldpet.tbl create mode 100644 include/encoding/oldpetscii.tbl create mode 100644 include/encoding/origpet.tbl create mode 100644 include/encoding/origpetscii.tbl create mode 100644 include/encoding/pcw.tbl create mode 100644 include/encoding/pet.tbl create mode 100644 include/encoding/petjp.tbl create mode 100644 include/encoding/petscii.tbl create mode 100644 include/encoding/petsciijp.tbl create mode 100644 include/encoding/petscr.tbl create mode 100644 include/encoding/petscrjp.tbl create mode 100644 include/encoding/pokemon1de.tbl create mode 100644 include/encoding/pokemon1en.tbl create mode 100644 include/encoding/pokemon1es.tbl create mode 100644 include/encoding/pokemon1fr.tbl create mode 100644 include/encoding/pokemon1it.tbl create mode 100644 include/encoding/pokemon1jp.tbl create mode 100644 include/encoding/short_koi.tbl create mode 100644 include/encoding/sinclair.tbl create mode 100644 include/encoding/utf16be.tbl create mode 100644 include/encoding/utf16le.tbl create mode 100644 include/encoding/utf8.tbl create mode 100644 include/encoding/vectrex.tbl create mode 100644 include/encoding/zx80.tbl create mode 100644 include/encoding/zx81.tbl create mode 100644 src/main/scala/millfork/parser/TextCodecRepository.scala create mode 100644 src/test/scala/millfork/test/auxilary/EncodingSanitySuite.scala diff --git a/.gitignore b/.gitignore index 3de95e29..1e086ff9 100644 --- a/.gitignore +++ b/.gitignore @@ -9,9 +9,6 @@ src/test/scala/experiments/ # doesn't work yet examples/lunix/ -# may become a feature in the future -*.tbl - # hidden files *.~ diff --git a/CHANGELOG.md b/CHANGELOG.md index 06dab0c0..05f86cd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,30 @@ ## Current version +* Allowed defining custom text encodings. +**Potentially breaking change!** +There are no built-in encodings now, the include path needs to contain the necessary encodings. + +* Fixed encodings: +`apple2`, `atasciiscr`, `iso_de`, `iso_no`, `iso_se`, +`koi7n2`, `msx_jp`, +`oldpet`, `origpet`, `petscii`, `petsciijp`, `petscr`, `petscrjp`, +`zx80`. + +* Added encodings: +`apple2c`, `apple2e`, `apple2gs`, +`cpc_da`, `cpc_en`, `cpc_es`, `cpc_fr`, +`cp437`, `cp850`, `cp851`, `cp852`, `cp855`, `cp858`, `cp866`, +`cp1250`, `cp1251`, `cp1252`, +`ebcdic`, +`galaksija`, +`iso8859_1`, `iso8859_2`, `iso8859_3`, `iso8859_4`, `iso8859_5`, +`iso8859_7`, `iso8859_9`, `iso8859_10`, `iso8859_13`, `iso8859_14`, `iso8859_16`, +`kamenicky`,`mazovia`, `pcw`, +`pokemon1en`, `pokemon1es`, `pokemon1fr`, `pokemon1jp`. + +* **Potentially breaking change!** Changed default encoding for CPC to `cpc_en`. + * Allow importing modules from subdirectories. * Allow placing platform definitions in a dedicated subdirectory. diff --git a/docs/doc_index.md b/docs/doc_index.md index 18331ce5..7eef2cfb 100644 --- a/docs/doc_index.md +++ b/docs/doc_index.md @@ -26,6 +26,8 @@ * [List of text encodings and escape sequences](lang/text.md) +* [Defining custom encodings](lang/custom-encoding.md) + * [Operators reference](lang/operators.md) * [Functions](lang/functions.md) diff --git a/docs/lang/custom-encoding.md b/docs/lang/custom-encoding.md new file mode 100644 index 00000000..5412e7f6 --- /dev/null +++ b/docs/lang/custom-encoding.md @@ -0,0 +1,52 @@ +[< back to index](../doc_index.md) + +### Defining custom encodings + +Every encoding is defined in an `.tbl` file with an appropriate name. +The file is looked up in the directories on the include path, first directly, then in the `encoding` subdirectory. + +The file is a UTF-8 text file, with each line having a specific meaning. +In the specifications below, `<>` are not to be meant literally: + +* lines starting with `#`, `;` or `//` are comments. + +* `ALIAS=` defines this encoding to be an alias for another encoding. +No other lines are allowed in the file. + +* `NAME=` defines the name for this encoding. Required. + +* `BUILTIN=` defines this encoding to be a UTF-based encoding. +`` may be one of `UTF-8`, `UTF-16LE`, `UTF-16BE`. +If this directive is present, the only other allowed directive in the file is the `NAME` directive. + +* `EOT=` where `` are two hex digits, defines the string terminator byte. +Required, unless `BUILTIN` is present. +There have to be two digits, `EOT=0` is invalid. + +* lines like `=` where `` are two hex digits +and `` is either a **non-whitespace** character or a **BMP** Unicode codepoint written as `U+xxxx`, +define the byte `` to correspond to character ``. +There have to be two digits, `0=@` is invalid. + +* lines like `-=` where `` is repeated an appropriate number of times +define characters for multiple byte values. +In this kind of lines, characters cannot be represented as Unicode codepoints. + +* lines like `=`, `=` etc. +define secondary or alternate characters that are going to be represented as one or more bytes. +There have to be two digits, `@=0` is invalid. +Problematic characters (space, `=`, `#`, `;`) can be written as Unicode codepoints `U+xxxx`. + +* a line like `a-z=` is equivalent to lines `a=`, `b=` all the way to `z=`. + +* a line like `KATAKANA=>DECOMPOSE` means that katakana characters with dakuten or handakuten +should be split into the base character and the standalone dakuten/handakuten. + +* similarly with `HIRAGANA=>DECOMPOSE`. + +* lines like `{}=`, `{}=` etc. +define escape codes. It's a good practice to define these when possible: +`{q}`, `{apos}`, `{n}`, `{lbrace}`, `{rbrace}`, +`{yen}`, `{pound}`, `{cent}`, `{euro}`, `{copy}`, `{pi}`, +`{nbsp}`, `{shy}`. + diff --git a/docs/lang/text.md b/docs/lang/text.md index 11e444bd..aacf48f3 100644 --- a/docs/lang/text.md +++ b/docs/lang/text.md @@ -1,6 +1,13 @@ [< back to index](../doc_index.md) -# Text encodings ans escape sequences +# Text encodings and escape sequences + +### Defining custom encodings + +Every platform is defined in an `.tbl` file with an appropriate name. +The file is looked up in the directories on the include path, first directly, then in the `encoding` subdirectory. + +TODO: document the file format. ### Text encoding list @@ -11,19 +18,25 @@ * `ascii` – standard ASCII -* `pet` or `petscii` – PETSCII (ASCII-like character set used by Commodore machines from VIC-20 onward) +* `petscii` or `pet` – PETSCII (ASCII-like character set used by Commodore machines from VIC-20 onward) -* `petjp` or `petsciijp` – PETSCII as used on Japanese versions of Commodore 64 +* `petsciijp` or `petjp` – PETSCII as used on Japanese versions of Commodore 64 -* `origpet` or `origpetscii` – old PETSCII (Commodore PET with original ROMs) +* `origpetscii` or `origpet` – old PETSCII (Commodore PET with original ROMs) -* `oldpet` or `oldpetscii` – old PETSCII (Commodore PET with newer ROMs) +* `oldpetscii` or `oldpet` – old PETSCII (Commodore PET with newer ROMs) * `cbmscr` or `petscr` – Commodore screencodes * `cbmscrjp` or `petscrjp` – Commodore screencodes as used on Japanese versions of Commodore 64 -* `apple2` – Apple II charset ($A0–$DF) +* `apple2` – original Apple II charset ($A0–$DF) + +* `apple2e` – Apple IIe charset + +* `apple2c` – alternative Apple IIc charset + +* `apple2gs` – Apple IIgs charset * `bbc` – BBC Micro character set @@ -37,15 +50,51 @@ * `iso_de`, `iso_no`, `iso_se`, `iso_yu` – various variants of ISO/IEC-646 -* `iso_dk`, `iso_fi` – aliases for `iso_no` and `iso_se` respectively + * `iso_dk`, `iso_fi` – aliases for `iso_no` and `iso_se` respectively -* `iso15` – ISO 8859-15 +* `iso8859_1`, `iso8859_2`, `iso8859_3`, +`iso8859_4`, `iso8859_5`, `iso8859_7`, +`iso8859_9`, `iso8859_10`, `iso8859_13`, +`iso8859_14`, `iso8859_15`, `iso8859_13` – +ISO 8859-1, ISO 8859-2, ISO 8859-3, +ISO 8859-4, ISO 8859-5, ISO 8859-7, +ISO 8859-9, ISO 8859-10, ISO 8859-13, +ISO 8859-14, ISO 8859-15, ISO 8859-16, -* `latin0`, `latin9`, `iso8859_15` – aliases for `iso15` + * `iso1`, `latin1` – aliases for `iso8859_1` + * `iso2`, `latin2` – aliases for `iso8859_2` + * `iso3`, `latin3` – aliases for `iso8859_3` + * `iso4`, `latin4` – aliases for `iso8859_4` + * `iso5` – alias for `iso8859_5` + * `iso7` – alias for `iso8859_7` + * `iso9`, `latin5`, – aliases for `iso8859_9` + * `iso10`, `latin6` – aliases for `iso8859_10` + * `iso13`, `latin7` – aliases for `iso8859_13` + * `iso14`, `latin8` – aliases for `iso8859_14` + * `iso_15`, `latin9`, `latin0` – aliases for `iso8859_15` + * `iso16`, `latin10` – aliases for `iso8859_16` + +* `cp437`, `cp850`, `cp851`, `cp852`, `cp855`, `cp858`, `cp866` – +DOS codepages 437, 850, 851, 852, 855, 858, 866 + +* `mazovia` – Mazovia encoding + +* `kamenicky` – Kamenický encoding + +* `cp1250`, `cp1251`, `cp1252` – Windows codepages 1250, 1251, 1252 * `msx_intl`, `msx_jp`, `msx_ru`, `msx_br` – MSX character encoding, International, Japanese, Russian and Brazilian respectively -* `msx_us`, `msx_uk`, `msx_fr`, `msx_de` – aliases for `msx_intl` + * `msx_us`, `msx_uk`, `msx_fr`, `msx_de` – aliases for `msx_intl` + +* `cpc_en`, `cpc_fr`, `cpc_es`, `cpc_da` – Amstrad CPC character encoding, English, French, Spanish and Danish respectively + +* `pcw` or `amstrad_cpm` – Amstrad CP/M encoding, the US variant (language 0), as used on PCW machines + +* `pokemon1en`, `pokemon1jp`, `pokemon1es`, `pokemon1fr` – text encodings used in 1st generation Pokémon games, +English, Japanese, Spanish/Italian and French/German respectively + + * `pokemon1it`, `pokemon1de` – aliases for `pokemon1es` and `pokemon1fr` respectively * `atascii` or `atari` – ATASCII as seen on Atari 8-bit computers @@ -55,13 +104,21 @@ * `vectrex` – built-in Vectrex font +* `galaksija` – text encoding used on Galaksija computers + +* `ebcdic` – EBCDIC codepage 037 (partial coverage) + * `utf8` – UTF-8 * `utf16be`, `utf16le` – UTF-16BE and UTF-16LE When programming for Commodore, -use `pet` for strings you're printing using standard I/O routines -and `petscr` for strings you're copying to screen memory directly. +use `petscii` for strings you're printing using standard I/O routines +and `petsciiscr` for strings you're copying to screen memory directly. + +When programming for Atari, +use `atascii` for strings you're printing using standard I/O routines +and `atasciiscr` for strings you're copying to screen memory directly. ### Escape sequences @@ -71,8 +128,6 @@ Some escape sequences may expand to multiple characters. For example, in several ##### Available everywhere -* `{q}` – double quote symbol - * `{x00}`–`{xff}` – a character of the given hexadecimal value * `{copyright_year}` – this expands to the current year in digits @@ -89,12 +144,15 @@ The exact value of `{nullchar}` is encoding-dependent: * in the `zx81` encoding it's `{x0b}`, * in the `petscr` and `petscrjp` encodings it's `{xe0}`, * in the `atasciiscr` encoding it's `{xdb}`, + * in the `pokemon1*` encodings it's `{x50}`, * in the `utf16be` and `utf16le` encodings it's exceptionally two bytes: `{x00}{x00}` * in other encodings it's `{x00}` (this may be a subject to change in future versions). ##### Available only in some encodings -* `{apos}` – apostrophe/single quote (available everywhere except for `zx80` and `zx81`) +* `{apos}` – apostrophe/single quote (available everywhere except for `zx80`, `zx81` and `galaksija`) + +* `{q}` – double quote symbol (available everywhere except for `pokemon1*` encodings) * `{n}` – new line @@ -105,19 +163,25 @@ The exact value of `{nullchar}` is encoding-dependent: * `{up}`, `{down}`, `{left}`, `{right}` – control codes for moving the cursor * `{white}`, `{black}`, `{red}`, `{green}`, `{blue}`, `{cyan}`, `{yellow}`, `{purple}` – -control codes for changing the text color +control codes for changing the text color (`petscii`, `petsciijp`, `sinclair` only) * `{bgwhite}`, `{bgblack}`, `{bgred}`, `{bggreen}`, `{bgblue}`, `{bgcyan}`, `{bgyellow}`, `{bgpurple}` – -control codes for changing the text background color +control codes for changing the text background color (`sinclair` only) * `{reverse}`, `{reverseoff}` – inverted mode on/off * `{yen}`, `{pound}`, `{cent}`, `{euro}`, `{copy}` – yen symbol, pound symbol, cent symbol, euro symbol, copyright symbol +* `{nbsp}`, `{shy}` – non-breaking space, soft hyphen + +* `{pi}` – letter π + * `{u0000}`–`{u1fffff}` – Unicode codepoint (available in UTF encodings only) ##### Character availability +For ISO/DOS/Windows/UTF encodings, consult external sources. + Encoding | lowercase letters | backslash | currencies | intl | card suits ---------|-------------------|-----------|------------|------|----------- `pet`, | yes¹ | no | £ | none | yes¹ @@ -132,14 +196,20 @@ Encoding | lowercase letters | backslash | currencies | intl | card suits `atascii` | yes | yes | | none | yes `atasciiscr` | yes | yes | | none | yes `jis` | yes | no | ¥ | both kana | no -`iso15` | yes | yes | €¢£¥ | Western | no `msx_intl`,`msx_br` | yes | yes | ¢£¥ | Western | yes `msx_jp` | yes | no | ¥ | katakana | yes `msx_ru` | yes | yes | | Russian⁴ | yes `koi7n2` | no | yes | | Russian⁵ | no +`cpc_en` | yes | yes | £ | none | yes +`cpc_es` | yes | yes | | Spanish⁶ | yes +`cpc_fr` | yes | no | £ | French⁷ | yes +`cpc_da` | yes | no | £ | Nor/Dan. | yes `vectrex` | no | yes | | none | no -`utf*` | yes | yes | all | all | yes -all the rest | yes | yes | | none | no +`pokemon1jp` | no | no | | both kana | no +`pokemon1en` | yes | no | | none | no +`pokemon1fr` | yes | no | | Ger/Fre. | no +`pokemon1es` | yes | no | | Spa/Ita. | no +`galaksija` | no | no | | Yugoslav⁸ | no 1. `pet`, `origpet` and `petscr` cannot display card suit symbols and lowercase letters at the same time. Card suit symbols are only available in graphics mode, @@ -155,6 +225,12 @@ Card suit symbols are only available in graphics mode, in which katakana is disp 5. Only uppercase. Letters **Ё** and **Ъ** are not available. +6. No accented vowels. + +7. Some accented vowels are not available. + +8. Letter **Đ** is not available. + If the encoding does not support lowercase letters (e.g. `apple2`, `petjp`, `petscrjp`, `koi7n2`, `vectrex`), then text and character literals containing lowercase letters are automatically converted to uppercase. Only unaccented Latin and Cyrillic letters will be converted as such. @@ -163,6 +239,8 @@ To detect if your default encoding does not support lowercase letters, test `'A' ##### Escape sequence availability +The table below may be incomplete. + Encoding | new line | braces | backspace | cursor movement | text colour | reverse | background colour ---------|----------|--------|-----------|-----------------|-------------|---------|------------------ `pet`,`petjp` | yes | no | no | yes | yes | yes | no @@ -172,8 +250,11 @@ Encoding | new line | braces | backspace | cursor movement | text colour | rever `sinclair` | yes | yes | no | yes | yes | yes | yes `zx80`,`zx81` | yes | no | yes | yes | no | no | no `ascii`, `iso_*` | yes | yes | yes | no | no | no | no -`iso15` | yes | yes | yes | no | no | no | no +`iso8869_*`, `cp*` | yes | yes | yes | no | no | no | no `apple2` | no | yes | no | no | no | no | no +`apple2` | no | no | no | no | no | no | no +`apple2e` | no | yes | no | no | no | no | no +`apple2gs` | no | yes | no | no | no | no | no `atascii` | yes | no | yes | yes | no | no | no `atasciiscr` | no | no | no | no | no | no | no `msx_*` | yes | yes | yes | yes | no | no | no diff --git a/include/encoding/amstrad_cpm.tbl b/include/encoding/amstrad_cpm.tbl new file mode 100644 index 00000000..b67d3e8b --- /dev/null +++ b/include/encoding/amstrad_cpm.tbl @@ -0,0 +1 @@ +ALIAS=pcw diff --git a/include/encoding/apple2.tbl b/include/encoding/apple2.tbl new file mode 100644 index 00000000..2e446324 --- /dev/null +++ b/include/encoding/apple2.tbl @@ -0,0 +1,13 @@ +NAME=APPLE-II +EOT=00 + +A0=U+0020 +A1-BF=!"#$%&'()*+,-./0123456789:;<=>? +C0-DF=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ + +a-z=C1 + +{q}=02 +{apos}=07 +{nbsp}=40 + diff --git a/include/encoding/apple2c.tbl b/include/encoding/apple2c.tbl new file mode 100644 index 00000000..5bfb0798 --- /dev/null +++ b/include/encoding/apple2c.tbl @@ -0,0 +1,15 @@ +NAME=APPLE-IIc +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[£]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{\}~ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d diff --git a/include/encoding/apple2e.tbl b/include/encoding/apple2e.tbl new file mode 100644 index 00000000..99baaf18 --- /dev/null +++ b/include/encoding/apple2e.tbl @@ -0,0 +1,14 @@ +NAME=APPLE-IIe +EOT=7F +# TODO + +00=U+0020 +01-1f=!"#$%&'()*+,-./0123456789:;<=>? +20-3f=@abcdefghijklmnopqrstuvwxyz[\]^_ +60-7e=πABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~ + +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{pi}=60 diff --git a/include/encoding/apple2gs.tbl b/include/encoding/apple2gs.tbl new file mode 100644 index 00000000..edf0c332 --- /dev/null +++ b/include/encoding/apple2gs.tbl @@ -0,0 +1,12 @@ +NAME=APPLE-IIgs +EOT=00 + +A0=U+0020 +A1-BF=!"#$%&'()*+,-./0123456789:;<=>? +C0-DF=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +E0-FE=`abcdefghijklmnopqrstuvwxyz{\}~ + +{q}=A2 +{apos}=A7 +{lbrace}=FB +{rbrace}=FC diff --git a/include/encoding/ascii.tbl b/include/encoding/ascii.tbl new file mode 100644 index 00000000..0bbcf9a4 --- /dev/null +++ b/include/encoding/ascii.tbl @@ -0,0 +1,15 @@ +NAME=ASCII +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d diff --git a/include/encoding/atari.tbl b/include/encoding/atari.tbl new file mode 100644 index 00000000..016956b3 --- /dev/null +++ b/include/encoding/atari.tbl @@ -0,0 +1 @@ +ALIAS=atascii diff --git a/include/encoding/atariscr.tbl b/include/encoding/atariscr.tbl new file mode 100644 index 00000000..dfb58a4f --- /dev/null +++ b/include/encoding/atariscr.tbl @@ -0,0 +1 @@ +ALIAS=atasciiscr diff --git a/include/encoding/atascii.tbl b/include/encoding/atascii.tbl new file mode 100644 index 00000000..d2011372 --- /dev/null +++ b/include/encoding/atascii.tbl @@ -0,0 +1,23 @@ +NAME=ATASCII +EOT=00 + +00=♡ +10=♣ +12=– +14=• +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7c=♢abcdefghijklmnopqrstuvwxyz♠| + +{q}=22 +{apos}=27 +{n}=9b +{up}=1c +{down}=1d +{left}=1e +{right}=1f +{b}=7e +{t}=7f +♥=00 +·=14 diff --git a/include/encoding/atasciiscr.tbl b/include/encoding/atasciiscr.tbl new file mode 100644 index 00000000..74c9ad7c --- /dev/null +++ b/include/encoding/atasciiscr.tbl @@ -0,0 +1,19 @@ +NAME=ATASCII-Screen +EOT=DB + + +40=♡ +50=♣ +52=– +54=• +5c-5f=↑↓←→ +00=U+0020 +01-1f=!"#$%&'()*+,-./0123456789:;<=>? +20-3f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7c=♢abcdefghijklmnopqrstuvwxyz♠| + +{q}=02 +{apos}=07 +♥=40 +·=54 + diff --git a/include/encoding/bbc.tbl b/include/encoding/bbc.tbl new file mode 100644 index 00000000..6626fb66 --- /dev/null +++ b/include/encoding/bbc.tbl @@ -0,0 +1,19 @@ +NAME=BBC +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7f=£abcdefghijklmnopqrstuvwxyz{|}~© + +↑=5E + +{b}=08 +{t}=09 +{n}=0d +{q}=22 +{apos}=27 +{pound}=60 +{lbrace}=7b +{rbrace}=7d +{copy}=7f diff --git a/include/encoding/cbmscr.tbl b/include/encoding/cbmscr.tbl new file mode 100644 index 00000000..f01d7e89 --- /dev/null +++ b/include/encoding/cbmscr.tbl @@ -0,0 +1,21 @@ +NAME=CBM-Screen +EOT=E0 + +00-1f=@abcdefghijklmnopqrstuvwxyz[£]↑← +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5a=–ABCDEFGHIJKLMNOPQRSTUVWXYZ +5e=π + +{apos}=27 +{q}=22 +{pound}=1c +{pi}=5e + +^=1E +♥=53 +♡=53 +♠=41 +♣=58 +♢=5A +•=51 diff --git a/include/encoding/cbmscrjp.tbl b/include/encoding/cbmscrjp.tbl new file mode 100644 index 00000000..c65c0e94 --- /dev/null +++ b/include/encoding/cbmscrjp.tbl @@ -0,0 +1,56 @@ +NAME=CBM-Screen-JP +EOT=E0 + +00-1f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑← +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-4f=タチツテトナニヌネノハヒフヘホマ +50-5f=ミムメモヤユヨラリルレロワン゛゜ +61-63=円年月 +66=ヲ +70-7f=πアイウエオカキクケコサシスセソ + +{apos}=27 +{q}=22 +{yen}=1c +{pi}=70 + +\=1C +^=1E +♥=44 +♡=44 +♠=41 +♣=7B +♢=42 +•=5D +ー=2D +U+ff70=2D +U+ff66=66 +ヮ=5C +ヵ=76 +ヶ=79 +ァ=71 +U+ff67=71 +ィ=72 +U+ff68=72 +ゥ=73 +U+ff69=73 +ェ=74 +U+ff6a=74 +ォ=75 +U+ff6b=75 +ャ=54 +U+ff6c=54 +ュ=55 +U+ff6d=55 +ョ=56 +U+ff6e=56 +ッ=42 +U+ff6f=42 +a-z=01 +ア-ソ=71 +タ-ン=40 +; TODO: narrow katakana +゙=5E +゚=5F +KATAKANA=>DECOMPOSE diff --git a/include/encoding/cp1250.tbl b/include/encoding/cp1250.tbl new file mode 100644 index 00000000..e38a0af0 --- /dev/null +++ b/include/encoding/cp1250.tbl @@ -0,0 +1,32 @@ +NAME=CP1250 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80=€ +82=‚ +84-87=„…†‡ +89-8f=‰Š‹ŚŤŽŹ +91-97=‘’“”•–— +99-9f=™š›śťžź +a1-ac=ˇ˘Ł¤Ą¦§¨©Ş«¬ +ae-af=®Ż +b0-bf=°±˛ł´µ¶·¸ąş»Ľ˝ľż +c0-cf=ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎ +d0-df=ĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß +e0-ef=ŕáâăäĺćçčéęëěíîď +f0-ff=đńňóôőö÷řůúűüýţ˙ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/cp1251.tbl b/include/encoding/cp1251.tbl new file mode 100644 index 00000000..3bf77d00 --- /dev/null +++ b/include/encoding/cp1251.tbl @@ -0,0 +1,28 @@ +NAME=CP1251 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8f=ЂЃ‚ѓ„…†‡€‰Љ‹ЊЌЋЏ +90-97=ђ‘’“”•–— +99-9f=™љ›њќћџ +a1-ac=ЎўЈ¤Ґ¦§Ё©Є«¬ +ae-af=®Ї +b0-bf=°±Ііґµ¶·ё№є»јЅѕї +c0-cf=АБВГДЕЖЗИЙКЛМНОП +d0-df=РСТУФХЦЧШЩЪЫЬЭЮЯ +e0-ef=абвгдежзийклмноп +f0-ff=рстуфхцчшщъыьэюя + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{copy}=a9 +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/cp1252.tbl b/include/encoding/cp1252.tbl new file mode 100644 index 00000000..30484ec3 --- /dev/null +++ b/include/encoding/cp1252.tbl @@ -0,0 +1,34 @@ +NAME=CP1252 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80=€ +82-8c=‚ƒ„…†‡ˆ‰Š‹Œ +8e=Ž +91-9c=‘’“”•–—˜™š›œ +9e-9f=žŸ +a1-ac=¡¢£¤¥¦§¨©ª«¬ +ae-af=®¯ +b0-bf=°±²³´µ¶·¸¹º»¼½¾¿ +c0-cf=ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ +d0-df=ÐÑÒÓÔÕÖרÙÚÛÜÝÞß +e0-ef=àáâãäåæçèéêëìíîï +f0-ff=ðñòóôõö÷øùúûüýþÿ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=a2 +{pound}=a3 +{yen}=a5 +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/cp437.tbl b/include/encoding/cp437.tbl new file mode 100644 index 00000000..432abf7e --- /dev/null +++ b/include/encoding/cp437.tbl @@ -0,0 +1,30 @@ +NAME=CP437 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ÇüéâäàåçêëèïîìÄÅ +90-9F=ÉæÆôöòûùÿÖÜ¢£¥₧ƒ +A0-AF=áíóúñѪº¿⌐¬½¼¡«» +B0-BF=░▒▓│┤╡╢╖╕╣║╗╝╜╛┐ +C0-CF=└┴┬├─┼╞╟╚╔╩╦╠═╬╧ +D0-DF=╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ +E0-EF=αßΓπΣσµτΦΘΩδ∞φε∩ +F0-FE=≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ + +β=E1 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=9b +{pound}=9c +{yen}=9d +{ss}=e1 +{nbsp}=FF diff --git a/include/encoding/cp850.tbl b/include/encoding/cp850.tbl new file mode 100644 index 00000000..12493d40 --- /dev/null +++ b/include/encoding/cp850.tbl @@ -0,0 +1,31 @@ +NAME=CP850 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ÇüéâäàåçêëèïîìÄÅ +90-9F=ÉæÆôöòûùÿÖÜø£Ø×ƒ +A0-AF=áíóúñѪº¿®¬½¼¡«» +B0-BF=░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐ +C0-CF=└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ +D0-DF=ðÐÊËÈıÍÎÙ┘┌█▄¦Ì▀ +E0-EF=ÓßÔÒõÕµþÞÚÛÙýݯ´ +F1-FE=±‗¾¶§÷¸°¨·¹³²■ + +β=E1 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=BD +{pound}=9c +{yen}=BE +{ss}=e1 +{nbsp}=FF +{shy}=F1 diff --git a/include/encoding/cp851.tbl b/include/encoding/cp851.tbl new file mode 100644 index 00000000..e54ccd92 --- /dev/null +++ b/include/encoding/cp851.tbl @@ -0,0 +1,30 @@ +NAME=CP851 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ÇüéâäàΆçêëèïîΈÄΉ +90=Ί +92-9F=ΌôöΎûùΏÖÜά£έήί +A0-AF=ϊΐόύΑΒΓΔΕΖΗ½ΘΙ«» +B0-BF=░▒▓│┤ΚΛΜΝ╣║╗╝ΞΟ┐ +C0-CF=└┴┬├─┼ΠΡ╚╔╩╦╠═╬Σ +D0-DF=ΤΥΦΧΨΩαβγ┘┌█▄δε▀ +E0-EF=ζηθικλμνξοπρσςτ΄ +F1-FE=±υφχ§ψ¸°¨ωϋΰώ■ + +ß=D7 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{ss}=D7 +{nbsp}=FF +{shy}=F0 +{pi}=EA diff --git a/include/encoding/cp852.tbl b/include/encoding/cp852.tbl new file mode 100644 index 00000000..0b0f8581 --- /dev/null +++ b/include/encoding/cp852.tbl @@ -0,0 +1,28 @@ +NAME=CP852 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ÇüéâäůćçłëŐőîŹÄĆ +90-9F=ÉĹĺôöĽľŚśÖÜŤťŁ×č +A0-AF=áíóúĄąŽžĘ꬟Ⱥ«» +B0-BF=░▒▓│┤ÁÂĚŞ╣║╗╝Żż┐ +C0-CF=└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ +D0-DF=đĐĎËďŇÍÎě┘┌█▄ŢŮ▀ +E0-EF=ÓßÔŃńňŠšŔÚŕŰýÝţ´ +F1-FE=˝˛ˇ˘§÷¸°¨˙űŘř■ + +β=E1 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{ss}=e1 +{nbsp}=FF +{shy}=F0 diff --git a/include/encoding/cp855.tbl b/include/encoding/cp855.tbl new file mode 100644 index 00000000..822362d3 --- /dev/null +++ b/include/encoding/cp855.tbl @@ -0,0 +1,25 @@ +NAME=CP855 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ђЂѓЃёЁєЄѕЅіІїЇјЈ +90-9F=љЉњЊћЋќЌўЎџЏюЮъЪ +A0-AF=аАбБцЦдДеЕфФгГ«» +B0-BF=░▒▓│┤хХиИ╣║╗╝йЙ┐ +C0-CF=└┴┬├─┼кК╚╔╩╦╠═╬¤ +D0-DF=лЛмМнНоОп┘┌█▄Пя▀ +E0-EF=ЯрРсСтТуУжЖвВьЬ№ +F1-FE=ыЫзЗшШэЭщЩчЧ§■ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{nbsp}=FF +{shy}=F0 diff --git a/include/encoding/cp858.tbl b/include/encoding/cp858.tbl new file mode 100644 index 00000000..f1051e47 --- /dev/null +++ b/include/encoding/cp858.tbl @@ -0,0 +1,32 @@ +NAME=CP858 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ÇüéâäàåçêëèïîìÄÅ +90-9F=ÉæÆôöòûùÿÖÜø£Ø×ƒ +A0-AF=áíóúñѪº¿®¬½¼¡«» +B0-BF=░▒▓│┤ÁÂÀ©╣║╗╝¢¥┐ +C0-CF=└┴┬├─┼ãÃ╚╔╩╦╠═╬¤ +D0-DF=ðÐÊËÈ€ÍÎÙ┘┌█▄¦Ì▀ +E0-EF=ÓßÔÒõÕµþÞÚÛÙýݯ´ +F1-FE=±‗¾¶§÷¸°¨·¹³²■ + +β=E1 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=BD +{pound}=9c +{yen}=BE +{euro}=D5 +{ss}=e1 +{nbsp}=FF +{shy}=F0 diff --git a/include/encoding/cp866.tbl b/include/encoding/cp866.tbl new file mode 100644 index 00000000..ed080dc0 --- /dev/null +++ b/include/encoding/cp866.tbl @@ -0,0 +1,24 @@ +NAME=CP866 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=АБВГДЕЖЗИЙКЛМНОП +90-9F=РСТУФХЦЧШЩЪЫЬЭЮЯ +A0-AF=абвгдежзийклмноп +B0-BF=░▒▓│┤╡╢╖╕╣║╗╝╜╛┐ +C0-CF=└┴┬├─┼╞╟╚╔╩╦╠═╬╧ +D0-DF=╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ +E0-EF=рстуфхцчшщъыьэюя +F0-FE=ЁёЄєЇїЎў°∙·√№¤■ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{nbsp}=FF diff --git a/include/encoding/cpc_da.tbl b/include/encoding/cpc_da.tbl new file mode 100644 index 00000000..c9b5f10e --- /dev/null +++ b/include/encoding/cpc_da.tbl @@ -0,0 +1,27 @@ +NAME=CPC-DA +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ↑_ +60-7e=`abcdefghijklmnopqrstuvwxyzæøå~ +a0-af=^ʹʺ£©¶§‘¼½¾±÷¬¿¡ +b0-bf=αβγδεθλμπσφψχωΣΩ +e2-e5=♣♢♡♠ +fe-ff=↕↔ + +’=27 +∈=b4 +♥=e4 + +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{AE}=5b +{OE}=5c +{AA}=5d +{ae}=7b +{oe}=7c +{aa}=7d diff --git a/include/encoding/cpc_en.tbl b/include/encoding/cpc_en.tbl new file mode 100644 index 00000000..809eda7b --- /dev/null +++ b/include/encoding/cpc_en.tbl @@ -0,0 +1,21 @@ +NAME=CPC-EN +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]↑_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a0-af=^ʹʺ£©¶§‘¼½¾±÷¬¿¡ +b0-bf=αβγδεθλμπσφψχωΣΩ +e2-e5=♣♢♡♠ +fe-ff=↕↔ + +’=27 +∈=b4 +♥=e4 + +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d diff --git a/include/encoding/cpc_es.tbl b/include/encoding/cpc_es.tbl new file mode 100644 index 00000000..c161176c --- /dev/null +++ b/include/encoding/cpc_es.tbl @@ -0,0 +1,21 @@ +NAME=CPC-ES +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]↑_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a0-af=^Ñʺ₧©¶§‘¼½¾ñ÷¬¿¡ +b0-bf=αβγδεθλμπσφψχωΣΩ +e2-e5=♣♢♡♠ +fe-ff=↕↔ + +’=27 +∈=b4 +♥=e4 + +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d diff --git a/include/encoding/cpc_fr.tbl b/include/encoding/cpc_fr.tbl new file mode 100644 index 00000000..c1c4012c --- /dev/null +++ b/include/encoding/cpc_fr.tbl @@ -0,0 +1,22 @@ +NAME=CPC-FR +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5d=àABCDEFGHIJKLMNOPQRSTUVWXYZ[ç] +5f=_ +60-7e=`abcdefghijklmnopqrstuvwxyzéùè~ +a0-af=^ʹ°£©¶§‘¼½¾±÷¬¿¡ +b0-bf=αβγδεθλμπσφψχωΣΩ +e2-e5=♣♢♡♠ +fe-ff=↕↔ + +’=27 +∈=b4 +♥=e4 + +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d diff --git a/include/encoding/ebcdic.tbl b/include/encoding/ebcdic.tbl new file mode 100644 index 00000000..187a66a8 --- /dev/null +++ b/include/encoding/ebcdic.tbl @@ -0,0 +1,26 @@ +NAME=EBCDIC +EOT=00 + +40=U+0020 +4A-50=¢.<(+|& +5A-61=!$*);¬-/ +6A-6F=¦,%_>? +79-7F=`:#@'=" +81-89=abcdefghi +91-99=jklmnopqr +A1-A9=~stuvwxyz +8F=± +B0=^ +BA-BB=[] +C0-C9={ABCDEFGHI +D0-D9=}JKLMNOPQR +E1=\ +E2-E9=STUVWXYZ +F0-F9=0123456789 + +{n}=15 +{t}=05 +{apos}=7D +{q}=7F +{lbrace}=C0 +{rbrace}=D0 diff --git a/include/encoding/galaksija.tbl b/include/encoding/galaksija.tbl new file mode 100644 index 00000000..22d3a801 --- /dev/null +++ b/include/encoding/galaksija.tbl @@ -0,0 +1,16 @@ +NAME=Galaksija +EOT=00 + +20=U+0020 +21-26=!"#$%& +28-3f=()*+,-./0123456789:;<=>? +41-5f=ABCDEFGHIJKLMNOPQRSTUVWXYZČĆŽŠ_ + +a-z=41 +č=5b +ć=5c +ž=5d +š=5e +{n}=0d +{q}=22 +{galaksija}=4027 diff --git a/include/encoding/iso1.tbl b/include/encoding/iso1.tbl new file mode 100644 index 00000000..b4b8fc79 --- /dev/null +++ b/include/encoding/iso1.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_1 diff --git a/include/encoding/iso10.tbl b/include/encoding/iso10.tbl new file mode 100644 index 00000000..42fe20ca --- /dev/null +++ b/include/encoding/iso10.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_10 diff --git a/include/encoding/iso13.tbl b/include/encoding/iso13.tbl new file mode 100644 index 00000000..f654f064 --- /dev/null +++ b/include/encoding/iso13.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_13 diff --git a/include/encoding/iso14.tbl b/include/encoding/iso14.tbl new file mode 100644 index 00000000..efe10fd8 --- /dev/null +++ b/include/encoding/iso14.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_14 diff --git a/include/encoding/iso15.tbl b/include/encoding/iso15.tbl new file mode 100644 index 00000000..fbf63abd --- /dev/null +++ b/include/encoding/iso15.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_15 diff --git a/include/encoding/iso16.tbl b/include/encoding/iso16.tbl new file mode 100644 index 00000000..056f679d --- /dev/null +++ b/include/encoding/iso16.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_16 diff --git a/include/encoding/iso2.tbl b/include/encoding/iso2.tbl new file mode 100644 index 00000000..a9869e97 --- /dev/null +++ b/include/encoding/iso2.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_2 diff --git a/include/encoding/iso3.tbl b/include/encoding/iso3.tbl new file mode 100644 index 00000000..e6ccb3a6 --- /dev/null +++ b/include/encoding/iso3.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_3 diff --git a/include/encoding/iso4.tbl b/include/encoding/iso4.tbl new file mode 100644 index 00000000..4135c3d6 --- /dev/null +++ b/include/encoding/iso4.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_4 diff --git a/include/encoding/iso5.tbl b/include/encoding/iso5.tbl new file mode 100644 index 00000000..5068d3fc --- /dev/null +++ b/include/encoding/iso5.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_5 diff --git a/include/encoding/iso7.tbl b/include/encoding/iso7.tbl new file mode 100644 index 00000000..402c8a82 --- /dev/null +++ b/include/encoding/iso7.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_7 diff --git a/include/encoding/iso8859_1.tbl b/include/encoding/iso8859_1.tbl new file mode 100644 index 00000000..475a33d2 --- /dev/null +++ b/include/encoding/iso8859_1.tbl @@ -0,0 +1,29 @@ +NAME=ISO 8859-1 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=¡¢£¤¥¦§¨©ª«¬ +ae-af=®¯ +b0-bf=°±²³´µ¶·¸¹º»¼½¾¿ +c0-cf=ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ +d0-df=ÐÑÒÓÔÕÖרÙÚÛÜÝÞß +e0-ef=àáâãäåæçèéêëìíîï +f0-ff=ðñòóôõö÷øùúûüýþÿ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=a2 +{pound}=a3 +{yen}=a5 +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_10.tbl b/include/encoding/iso8859_10.tbl new file mode 100644 index 00000000..42832eba --- /dev/null +++ b/include/encoding/iso8859_10.tbl @@ -0,0 +1,27 @@ +NAME=ISO 8859-10 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=ĄĒĢĪĨͧĻĐŠŦŽ +ae-af=ŪŊ +b0-bf=°ąēģīĩķ·ļđšŧž―ūŋ +c0-cf=ĀÁÂÃÄÅÆĮČÉĘËĖÍÎÏ +d0-df=ÐŅŌÓÔÕÖŨØŲÚÛÜÝÞß +e0-ef=āáâãäåæįčéęëėíîï +f0-ff=ðņōóôõöũøųúûüýþĸ + + + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_13.tbl b/include/encoding/iso8859_13.tbl new file mode 100644 index 00000000..7faed69d --- /dev/null +++ b/include/encoding/iso8859_13.tbl @@ -0,0 +1,30 @@ +NAME=ISO 8859-13 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=”¢£¤„¦§Ø©Ŗ«¬ +ae-af=®Æ +b0-bf=°±²³“µ¶·ø¹ŗ»¼½¾æ +c0-cf=ĄĮĀĆÄÅĘĒČÉŹĖĢĶĪĻ +d0-df=ŠŃŅÓŌÕÖ×ŲŁŚŪÜŻŽß +e0-ef=ąįāćäåęēčéźėģķīļ +f0-ff=šńņóōõö÷ųłśūüżž’ + + + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=a2 +{pound}=a3 +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_14.tbl b/include/encoding/iso8859_14.tbl new file mode 100644 index 00000000..b6d18c5f --- /dev/null +++ b/include/encoding/iso8859_14.tbl @@ -0,0 +1,29 @@ +NAME=ISO 8859-14 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=Ḃḃ£ĊċḊ§Ẁ©ẂḋỲ +ae-af=®Ÿ +b0-bf=ḞḟĠġṀṁ¶ṖẁṗẃṠỳẄẅṡ +c0-cf=ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ +d0-df=ŴÑÒÓÔÕÖṪØÙÚÛÜÝŶß +e0-ef=àáâãäåæçèéêëìíîï +f0-ff=ŵñòóôõöṫøùúûüýŷÿ + + + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{pound}=a3 +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_15.tbl b/include/encoding/iso8859_15.tbl new file mode 100644 index 00000000..af24b9f7 --- /dev/null +++ b/include/encoding/iso8859_15.tbl @@ -0,0 +1,30 @@ +NAME=ISO 8859-15 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=¡¢£€¥Š§š©ª«¬ +ae-af=®¯ +b0-bf=°±²³Žµ¶·ž¹º»ŒœŸ¿ +c0-cf=ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ +d0-df=ÐÑÒÓÔÕÖרÙÚÛÜÝÞß +e0-ef=àáâãäåæçèéêëìíîï +f0-ff=ðñòóôõö÷øùúûüýþÿ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=a2 +{pound}=a3 +{euro}=a4 +{yen}=a5 +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_16.tbl b/include/encoding/iso8859_16.tbl new file mode 100644 index 00000000..dbe5fa84 --- /dev/null +++ b/include/encoding/iso8859_16.tbl @@ -0,0 +1,29 @@ +NAME=ISO 8859-16 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=ĄąŁ€„Чš©Ș«Ź +ae-af=źŻ +b0-bf=°±ČłŽ”¶·žčș»ŒœŸż +c0-cf=ÀÁÂĂÄĆÆÇÈÉÊËÌÍÎÏ +d0-df=ĐŃÒÓÔŐÖŚŰÙÚÛÜĘȚß +e0-ef=àáâăäćæçèéêëìíîï +f0-ff=đńòóôőöśűùúûüęțÿ + + + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{euro}=a4 +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_2.tbl b/include/encoding/iso8859_2.tbl new file mode 100644 index 00000000..cef7d8e6 --- /dev/null +++ b/include/encoding/iso8859_2.tbl @@ -0,0 +1,25 @@ +NAME=ISO 8859-2 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=Ą˘Ł¤ĽŚ§¨ŠŞŤŹ +ae-af=ŽŻ +b0-bf=°ą˛ł´ľśˇ¸šşťź˝žż +c0-cf=ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎ +d0-df=ĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß +e0-ef=ŕáâăäĺćçčéęëěíîď +f0-ff=đńňóôőö÷řůúűüýţ˙ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_3.tbl b/include/encoding/iso8859_3.tbl new file mode 100644 index 00000000..3d109f69 --- /dev/null +++ b/include/encoding/iso8859_3.tbl @@ -0,0 +1,29 @@ +NAME=ISO 8859-3 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-a4=Ħ˘£¤ +a6-ac=Ĥ§¨İŞĞĴ +af=Ż +b0-bd=°ħ²³´µĥ·¸ışğĵ½ +bf=ż +c0-c2=ÀÁ +c4-cf=ÄĊĈÇÈÉÊËÌÍÎÏ +d1-df=ÑÒÓÔĠÖ×ĜÙÚÛÜŬŜß +e0-e2=àáâ +e4-ef=äċĉçèéêëìíîï +f1-ff=ñòóôġö÷ĝùúûüŭŝ˙ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_4.tbl b/include/encoding/iso8859_4.tbl new file mode 100644 index 00000000..9783654b --- /dev/null +++ b/include/encoding/iso8859_4.tbl @@ -0,0 +1,25 @@ +NAME=ISO 8859-4 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=ĄĸŖ¤Ĩϧ¨ŠĒĢŦ +ae-af=ޝ +b0-bf=°ą˛ŗ´ĩšēģŧŊžŋ +c0-cf=ĀÁÂÃÄÅÆĮČÉĘËĖÍÎĪ +d0-df=ĐŅŌĶÔÕÖרŲÚÛÜŨŪß +e0-ef=āáâãäåæįčéęëėíîī +f0-ff=đņōķôõö÷øųúûüũū˙ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_5.tbl b/include/encoding/iso8859_5.tbl new file mode 100644 index 00000000..f9bb767b --- /dev/null +++ b/include/encoding/iso8859_5.tbl @@ -0,0 +1,24 @@ +NAME=ISO 8859-5 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=ЁЂЃЄЅІЇЈЉЊЋЌ +ae-af=ЎЏ +b0-bf=АБВГДЕЖЗИЙКЛМНОП +c0-cf=РСТУФХЦЧШЩЪЫЬЭЮЯ +d0-df=абвгдежзийклмноп +e0-ef=рстуфхцчшщъыьэюя +f0-ff=№ёђѓєѕіїјљњћќ§ўџ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_7.tbl b/include/encoding/iso8859_7.tbl new file mode 100644 index 00000000..960cf714 --- /dev/null +++ b/include/encoding/iso8859_7.tbl @@ -0,0 +1,31 @@ +NAME=ISO 8859-7 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=‘’£€₯¦§¨©ͺ«¬ +af=― +b0-bf=°±²³΄΅Ά·ΈΉΊ»Ό½ΎΏ +c0-cf=ΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟ +d0-d1=ΠΡ +d3-df=ΣΤΥΦΧΨΩΪΫάέήί +e0-ef=ΰαβγδεζηθικλμνξο +f0-fe=πρςστυφχψωϊϋόύώ + + + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{pound}=a3 +{euro}=a4 +{copy}=a9 +{pi}=f0 +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso8859_9.tbl b/include/encoding/iso8859_9.tbl new file mode 100644 index 00000000..9cec6976 --- /dev/null +++ b/include/encoding/iso8859_9.tbl @@ -0,0 +1,31 @@ +NAME=ISO 8859-9 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-ac=¡¢£¤¥¦§¨©ª«¬ +ae-af=®¯ +b0-bf=°±²³´µ¶·¸¹º»¼½¾¿ +c0-cf=ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ +d0-df=ĞÑÒÓÔÕÖרÙÚÛÜİŞß +e0-ef=àáâãäåæçèéêëìíîï +f0-ff=ğñòóôõö÷øùúûüışÿ + + + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=a2 +{pound}=a3 +{yen}=a5 +{copy}=a9 +{ss}=df +{nbsp}=A0 +{shy}=AD diff --git a/include/encoding/iso9.tbl b/include/encoding/iso9.tbl new file mode 100644 index 00000000..edd502cd --- /dev/null +++ b/include/encoding/iso9.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_9 diff --git a/include/encoding/iso_de.tbl b/include/encoding/iso_de.tbl new file mode 100644 index 00000000..8c382698 --- /dev/null +++ b/include/encoding/iso_de.tbl @@ -0,0 +1,22 @@ +NAME=ISO-IEC-646-DE +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_ +60-7e=`abcdefghijklmnopqrstuvwxyzäöüß + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{AE}=5b +{OE}=5c +{UE}=5d +{ae}=7b +{oe}=7c +{ue}=7d +{ss}=7e +{lbrace}=7b +{rbrace}=7d diff --git a/include/encoding/iso_dk.tbl b/include/encoding/iso_dk.tbl new file mode 100644 index 00000000..f60d1ed7 --- /dev/null +++ b/include/encoding/iso_dk.tbl @@ -0,0 +1 @@ +ALIAS=iso_no diff --git a/include/encoding/iso_fi.tbl b/include/encoding/iso_fi.tbl new file mode 100644 index 00000000..670a138d --- /dev/null +++ b/include/encoding/iso_fi.tbl @@ -0,0 +1 @@ +ALIAS=iso_se diff --git a/include/encoding/iso_no.tbl b/include/encoding/iso_no.tbl new file mode 100644 index 00000000..2665c680 --- /dev/null +++ b/include/encoding/iso_no.tbl @@ -0,0 +1,33 @@ +NAME=ISO-IEC-646-NO +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_ +60-7e=`abcdefghijklmnopqrstuvwxyzæøå~ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{AE}=5b +{OE}=5c +{AA}=5d +{ae}=7b +{oe}=7c +{aa}=7d +{lbrace}=7b +{rbrace}=7d +¯=7e +‾=7e +|=7e +Ä=40 +ä=60 +Ü=5e +ü=7e +¤=24 +«=22 +»=22 +§=23 + diff --git a/include/encoding/iso_se.tbl b/include/encoding/iso_se.tbl new file mode 100644 index 00000000..92f24e95 --- /dev/null +++ b/include/encoding/iso_se.tbl @@ -0,0 +1,29 @@ +NAME=ISO-IEC-646-SE +EOT=00 + +20=U+0020 +21-3f=!"#¤%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_ +60-7e=`abcdefghijklmnopqrstuvwxyzäöå~ + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{AE}=5b +{OE}=5c +{AA}=5d +{ae}=7b +{oe}=7c +{aa}=7d +{lbrace}=7b +{rbrace}=7d +¯=7e +‾=7e +É=40 +é=60 +Ü=5e +ü=7e +$=24 + diff --git a/include/encoding/iso_yu.tbl b/include/encoding/iso_yu.tbl new file mode 100644 index 00000000..4d1a3d2d --- /dev/null +++ b/include/encoding/iso_yu.tbl @@ -0,0 +1,22 @@ +NAME=ISO-IEC-646-YU +EOT=00 + +20=U+0020 +21-3f=!"#$%^'()*+,-./0123456789:;<=>? +40-5f=ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_ +60-7e=žabcdefghijklmnopqrstuvwxyzšđćč + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +Ë=24 +ë=5f +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d diff --git a/include/encoding/jis.tbl b/include/encoding/jis.tbl new file mode 100644 index 00000000..65ac3283 --- /dev/null +++ b/include/encoding/jis.tbl @@ -0,0 +1,49 @@ +NAME=JIS-X-0201 +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a1-af=。「」、・ヲァィゥェォャュョッ +b0-bf=ーアイウエオカキクケコサシスセソ +c0-cf=タチツテトナニヌネノハヒフヘホマ +d0-df=ミムメモヤユヨラリルレロワン゛゜ +e8-eb=♠♡♢♣ +f0-f6=円年月日時分秒 +fa=\ + +¦=7C +¯=7E +‾=7E +♥=E9 + +ア-ン=B1 + +U+FF61=A1 +U+FF62=A2 +U+FF63=A3 +U+FF64=A4 +U+FF65=A5 +U+FF66=A6 +U+FF67=A7 +U+FF68=A8 +U+FF69=A9 +U+FF6A=AA +U+FF6B=AB +U+FF6C=AC +U+FF6D=AD +U+FF6E=AE +U+FF6F=AF +U+FF70=B0 +U+FF9E=DE +U+FF9F=DF + +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{yen}=5c + +KATAKANA=>DECOMPOSE diff --git a/include/encoding/jisx.tbl b/include/encoding/jisx.tbl new file mode 100644 index 00000000..01e2f37b --- /dev/null +++ b/include/encoding/jisx.tbl @@ -0,0 +1 @@ +ALIAS=jis diff --git a/include/encoding/kamenicky.tbl b/include/encoding/kamenicky.tbl new file mode 100644 index 00000000..bf94ff56 --- /dev/null +++ b/include/encoding/kamenicky.tbl @@ -0,0 +1,27 @@ +NAME=Kamenicky +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ČüéďäĎŤčěĚĹÍľĺÄÁ +90-9F=ÉžŽôöÓůÚýÖÜŠĽÝŘť +A0-AF=áíóúňŇŮÔšřŕŔ¼§«» +B0-BF=░▒▓│┤╡╢╖╕╣║╗╝╜╛┐ +C0-CF=└┴┬├─┼╞╟╚╔╩╦╠═╬╧ +D0-DF=╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ +E0-EF=αßΓπΣσµτΦΘΩδ∞φε∩ +F0-FE=≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ + +β=E1 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{ss}=e1 +{nbsp}=FF diff --git a/include/encoding/koi7n2.tbl b/include/encoding/koi7n2.tbl new file mode 100644 index 00000000..78cc46f7 --- /dev/null +++ b/include/encoding/koi7n2.tbl @@ -0,0 +1,19 @@ +NAME=KOI-7 N2 +EOT=00 + +20=U+0020 +21-3f=!"#¤%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧ + +{b}=08 +{t}=09 +{n}=0d +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +$=24 +↑=5E +a-z=41 +ю-ч=60 diff --git a/include/encoding/latin0.tbl b/include/encoding/latin0.tbl new file mode 100644 index 00000000..fbf63abd --- /dev/null +++ b/include/encoding/latin0.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_15 diff --git a/include/encoding/latin1.tbl b/include/encoding/latin1.tbl new file mode 100644 index 00000000..b4b8fc79 --- /dev/null +++ b/include/encoding/latin1.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_1 diff --git a/include/encoding/latin10.tbl b/include/encoding/latin10.tbl new file mode 100644 index 00000000..5d7af47a --- /dev/null +++ b/include/encoding/latin10.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_16 + diff --git a/include/encoding/latin2.tbl b/include/encoding/latin2.tbl new file mode 100644 index 00000000..3bed26bc --- /dev/null +++ b/include/encoding/latin2.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_2 + diff --git a/include/encoding/latin3.tbl b/include/encoding/latin3.tbl new file mode 100644 index 00000000..488bcdbc --- /dev/null +++ b/include/encoding/latin3.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_3 + diff --git a/include/encoding/latin4.tbl b/include/encoding/latin4.tbl new file mode 100644 index 00000000..5fea46c9 --- /dev/null +++ b/include/encoding/latin4.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_4 + diff --git a/include/encoding/latin5.tbl b/include/encoding/latin5.tbl new file mode 100644 index 00000000..0f8b4911 --- /dev/null +++ b/include/encoding/latin5.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_9 + diff --git a/include/encoding/latin6.tbl b/include/encoding/latin6.tbl new file mode 100644 index 00000000..11f77dfa --- /dev/null +++ b/include/encoding/latin6.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_10 + diff --git a/include/encoding/latin7.tbl b/include/encoding/latin7.tbl new file mode 100644 index 00000000..909c8222 --- /dev/null +++ b/include/encoding/latin7.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_13 + diff --git a/include/encoding/latin8.tbl b/include/encoding/latin8.tbl new file mode 100644 index 00000000..6151b943 --- /dev/null +++ b/include/encoding/latin8.tbl @@ -0,0 +1,2 @@ +ALIAS=iso8859_14 + diff --git a/include/encoding/latin9.tbl b/include/encoding/latin9.tbl new file mode 100644 index 00000000..fbf63abd --- /dev/null +++ b/include/encoding/latin9.tbl @@ -0,0 +1 @@ +ALIAS=iso8859_15 diff --git a/include/encoding/mazovia.tbl b/include/encoding/mazovia.tbl new file mode 100644 index 00000000..eac2430b --- /dev/null +++ b/include/encoding/mazovia.tbl @@ -0,0 +1,29 @@ +NAME=Mazovia +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8F=ÇüéâäàąçêëèïîćÄĄ +90-9F=ĘęłôöĆûùŚÖܢ٥śƒ +A0-AF=ŹŻóÓńŃźż¿⌐¬½¼¡«» +B0-BF=░▒▓│┤╡╢╖╕╣║╗╝╜╛┐ +C0-CF=└┴┬├─┼╞╟╚╔╩╦╠═╬╧ +D0-DF=╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ +E0-EF=αßΓπΣσµτΦΘΩδ∞φε∩ +F0-FE=≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ + +β=E1 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{cent}=9b +{yen}=9d +{ss}=e1 +{nbsp}=FF diff --git a/include/encoding/msx_br.tbl b/include/encoding/msx_br.tbl new file mode 100644 index 00000000..11fc38e8 --- /dev/null +++ b/include/encoding/msx_br.tbl @@ -0,0 +1,50 @@ +NAME=MSX-BR +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-85=ÇüéâÁà +87-8f=çêÍÓÚÂÊÔÀ +90-9f=ÉæÆôöòûùÿÖÜ¢£¥₧ƒ +a0-af=áíóúñѪº¿⌐¬½¼¡«» +b0-ba=ÃãĨĩÕõŨũIJij¾ +bd-bf=‰¶§ +d8=Δ +da=ω +e0-ef=αβΓΠΣσµγΦθΩδ∞∅∈∩ +f0-f3=≡±≥≤ +f6=÷ +fc-fd=ⁿ² + +ß=E1 +¦=7C +Ő=B4 +ő=B5 +Ű=B6 +ű=B7 + +♥=0143 +♡=0143 +♢=0144 +♢=0144 +♣=0145 +♠=0146 +·=0147 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{right}=1c +{left}=1d +{up}=1e +{down}=1f +{cent}=9b +{pound}=9c +{yen}=9d + diff --git a/include/encoding/msx_de.tbl b/include/encoding/msx_de.tbl new file mode 100644 index 00000000..03961868 --- /dev/null +++ b/include/encoding/msx_de.tbl @@ -0,0 +1 @@ +ALIAS=msx_intl diff --git a/include/encoding/msx_es.tbl b/include/encoding/msx_es.tbl new file mode 100644 index 00000000..03961868 --- /dev/null +++ b/include/encoding/msx_es.tbl @@ -0,0 +1 @@ +ALIAS=msx_intl diff --git a/include/encoding/msx_fr.tbl b/include/encoding/msx_fr.tbl new file mode 100644 index 00000000..03961868 --- /dev/null +++ b/include/encoding/msx_fr.tbl @@ -0,0 +1 @@ +ALIAS=msx_intl diff --git a/include/encoding/msx_intl.tbl b/include/encoding/msx_intl.tbl new file mode 100644 index 00000000..55c46469 --- /dev/null +++ b/include/encoding/msx_intl.tbl @@ -0,0 +1,49 @@ +NAME=MSX-International +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8f=ÇüéâäàåçêëèïîìÄÅ +90-9f=ÉæÆôöòûùÿÖÜ¢£¥₧ƒ +a0-af=áíóúñѪº¿⌐¬½¼¡«» +b0-ba=ÃãĨĩÕõŨũIJij¾ +bd-bf=‰¶§ +d8=Δ +da=ω +e0-ef=αβΓΠΣσµγΦθΩδ∞∅∈∩ +f0-f3=≡±≥≤ +f6=÷ +fc-fd=ⁿ² + +ß=E1 +¦=7C +Ő=B4 +ő=B5 +Ű=B6 +ű=B7 + +♥=0143 +♡=0143 +♢=0144 +♢=0144 +♣=0145 +♠=0146 +·=0147 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{right}=1c +{left}=1d +{up}=1e +{down}=1f +{cent}=9b +{pound}=9c +{yen}=9d + diff --git a/include/encoding/msx_jp.tbl b/include/encoding/msx_jp.tbl new file mode 100644 index 00000000..cb522c00 --- /dev/null +++ b/include/encoding/msx_jp.tbl @@ -0,0 +1,77 @@ +NAME=MSX-JP +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-83=♠♡♣♢ +85-8f=·をぁぃぅぇぉゃゅょっ +90=U+3000 +91-9f=あいうえおかきくけこさしすせそ +a1-af=。「」、・ヲァィゥェォャュョッ +b0-bf=ーアイウエオカキクケコサシスセソ +c0-cf=タチツテトナニヌネノハヒフヘホマ +d0-df=ミムメモヤユヨラリルレロワン゛゜ +e0-ef=たちつてとなにぬねのはひふへほま +f0-fd=みむめもやゆよらりるれろわん + +¦=7C +♥=81 + +ア-ン=B1 + +U+FF61=A1 +U+FF62=A2 +U+FF63=A3 +U+FF64=A4 +U+FF65=A5 +U+FF66=A6 +U+FF67=A7 +U+FF68=A8 +U+FF69=A9 +U+FF6A=AA +U+FF6B=AB +U+FF6C=AC +U+FF6D=AD +U+FF6E=AE +U+FF6F=AF +U+FF70=B0 +U+FF9E=DE +U+FF9F=DF +月=0141 +火=0142 +水=0143 +木=0144 +金=0145 +土=0146 +日=0147 +年=0148 +円=0149 +時=014A +分=014B +秒=014C +百=014D +千=014E +万=014F +大=015D +中=015E +小=015F +π=0150 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{right}=1c +{left}=1d +{up}=1e +{down}=1f +{yen}=5c +{pi}=0150 + +KATAKANA=>DECOMPOSE +HIRAGANA=>DECOMPOSE diff --git a/include/encoding/msx_ru.tbl b/include/encoding/msx_ru.tbl new file mode 100644 index 00000000..952b1781 --- /dev/null +++ b/include/encoding/msx_ru.tbl @@ -0,0 +1,41 @@ +NAME=MSX-RU +EOT=00 + + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +a0-af=αβΓΠΣσµγΦθΩδ∞∅∈∩ +b0-b3=≡±≥≤ +b6=÷ +bc-bd=ⁿ² +bf=¤ +98=Δ +9a=ω +c0-df=юабцдефгхийклмнопярстужвьызшэщчъ +e0-fe=ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧ + +ß=A1 +¦=7C + +♥=0143 +♡=0143 +♢=0144 +♢=0144 +♣=0145 +♠=0146 +·=0147 + +{b}=08 +{t}=09 +{n}=0d0a +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{right}=1c +{left}=1d +{up}=1e +{down}=1f + diff --git a/include/encoding/msx_uk.tbl b/include/encoding/msx_uk.tbl new file mode 100644 index 00000000..03961868 --- /dev/null +++ b/include/encoding/msx_uk.tbl @@ -0,0 +1 @@ +ALIAS=msx_intl diff --git a/include/encoding/msx_us.tbl b/include/encoding/msx_us.tbl new file mode 100644 index 00000000..03961868 --- /dev/null +++ b/include/encoding/msx_us.tbl @@ -0,0 +1 @@ +ALIAS=msx_intl diff --git a/include/encoding/oldpet.tbl b/include/encoding/oldpet.tbl new file mode 100644 index 00000000..b0556f23 --- /dev/null +++ b/include/encoding/oldpet.tbl @@ -0,0 +1 @@ +ALIAS=oldpetscii diff --git a/include/encoding/oldpetscii.tbl b/include/encoding/oldpetscii.tbl new file mode 100644 index 00000000..f1c21c30 --- /dev/null +++ b/include/encoding/oldpetscii.tbl @@ -0,0 +1,26 @@ +NAME=Old PETSCII +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@abcdefghijklmnopqrstuvwxyz[\]↑← +c0-da=–ABCDEFGHIJKLMNOPQRSTUVWXYZ +de=π + +{n}=0d +{apos}=27 +{q}=22 +{pi}=de +{up}=91 +{down}=11 +{left}=9d +{right}=1d +{reverse}=12 +{reverseoff}=92 +^=5E +♥=D3 +♡=D3 +♠=C1 +♣=D8 +♢=DA +•=D1 diff --git a/include/encoding/origpet.tbl b/include/encoding/origpet.tbl new file mode 100644 index 00000000..4f471675 --- /dev/null +++ b/include/encoding/origpet.tbl @@ -0,0 +1 @@ +ALIAS=origpetscii \ No newline at end of file diff --git a/include/encoding/origpetscii.tbl b/include/encoding/origpetscii.tbl new file mode 100644 index 00000000..2987188a --- /dev/null +++ b/include/encoding/origpetscii.tbl @@ -0,0 +1,26 @@ +NAME=Original PETSCII +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]↑← +c0-da=–abcdefghijklmnopqrstuvwxyz +de=π + +{n}=0d +{apos}=27 +{q}=22 +{pi}=de +{up}=91 +{down}=11 +{left}=9d +{right}=1d +{reverse}=12 +{reverseoff}=92 +^=5E +♥=D3 +♡=D3 +♠=C1 +♣=D8 +♢=DA +•=D1 diff --git a/include/encoding/pcw.tbl b/include/encoding/pcw.tbl new file mode 100644 index 00000000..e2e5e5b1 --- /dev/null +++ b/include/encoding/pcw.tbl @@ -0,0 +1,57 @@ +NAME=Amstrad-CP/M +EOT=00 + + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7e=`abcdefghijklmnopqrstuvwxyz{|}~ +80-8f=◾︎╧╟╚╤║╔╠╢╝═╩╗╣╦╬ +90-9f=·╵╶└╷│┌├╴┘─┴┐┤┬┼ +a0-af=ªº°£©¶§†¼½¾«»₧¿¡ +b0-bf=ƒ¢¨´ˆ‰⅛⅜⅝⅞ß○•¥®™ +c0-cf=ÁÉÍÓÚÂÊÎÔÛÀÈÌÒÙŸ +d0-df=ÄËÏÖÜÇÆÅØÑÃÕ≥≤≠≃ +e0-ef=áéíóúâêîôûàèìòùÿ +f0-ff=äëïöüçæåøñãõ⇒⇐⇔≡ + +∞=1B00 +⊙=1B01 +Γ=1B02 +Δ=1B03 +⊗=1B04 +×=1B05 +÷=1B06 +∴=1B07 +Π=1B08 +↓=1B09 +Σ=1B0A +←=1B0B +→=1B0C +±=1B0D +↔=1B0E +Ω=1B0F +α=1B10 +β=1B11 +γ=1B12 +δ=1B13 +ε=1B14 +θ=1B15 +λ=1B16 +μ=1B17 +π=1B18 +ρ=1B19 +σ=1B1a +τ=1B1b +φ=1B1c +χ=1B1d +ψ=1B1e +ω=1B1f + +{n}=0d0a +{b}=08 +{q}=22 +{apos}=27 +{lbrace}=7b +{rbrace}=7d +{pi}=1b18 diff --git a/include/encoding/pet.tbl b/include/encoding/pet.tbl new file mode 100644 index 00000000..eb8935dd --- /dev/null +++ b/include/encoding/pet.tbl @@ -0,0 +1 @@ +ALIAS=petscii \ No newline at end of file diff --git a/include/encoding/petjp.tbl b/include/encoding/petjp.tbl new file mode 100644 index 00000000..5f146a58 --- /dev/null +++ b/include/encoding/petjp.tbl @@ -0,0 +1 @@ +ALIAS=petsciijp \ No newline at end of file diff --git a/include/encoding/petscii.tbl b/include/encoding/petscii.tbl new file mode 100644 index 00000000..5711a4e4 --- /dev/null +++ b/include/encoding/petscii.tbl @@ -0,0 +1,35 @@ +NAME=PETSCII +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@abcdefghijklmnopqrstuvwxyz[£]↑← +c0-da=–ABCDEFGHIJKLMNOPQRSTUVWXYZ +de=π + +{n}=0d +{apos}=27 +{q}=22 +{pound}=5c +{pi}=de +{up}=91 +{down}=11 +{left}=9d +{right}=1d +{white}=05 +{black}=90 +{red}=1c +{blue}=1f +{green}=1e +{cyan}=9f +{purple}=9c +{yellow}=9e +{reverse}=12 +{reverseoff}=92 +^=5E +♥=D3 +♡=D3 +♠=C1 +♣=D8 +♢=DA +•=D1 diff --git a/include/encoding/petsciijp.tbl b/include/encoding/petsciijp.tbl new file mode 100644 index 00000000..9861ad5e --- /dev/null +++ b/include/encoding/petsciijp.tbl @@ -0,0 +1,69 @@ +NAME=PETSCII-JP +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑← +a1-a3=円年月 +a6=ヲ +b0-bf=πアイウエオカキクケコサシスセソ +c0-cf=タチツテトナニヌネノハヒフヘホマ +d0-df=ミムメモヤユヨラリルレロワン゛゜ + +{n}=0d +{apos}=27 +{q}=22 +{yen}=5c +{pi}=b0 +{up}=91 +{down}=11 +{left}=9d +{right}=1d +{white}=05 +{black}=90 +{red}=1c +{blue}=1f +{green}=1e +{cyan}=9f +{purple}=9c +{yellow}=9e +{reverse}=12 +{reverseoff}=92 + +\=5C +^=5E +♥=C4 +♡=C4 +♠=C1 +♣=BB +♢=C2 +•=DD +ー=2D +U+ff70=2D +U+ff66=a6 +ヮ=DC +ヵ=B6 +ヶ=B9 +ァ=B1 +U+ff67=B1 +ィ=B2 +U+ff68=B2 +ゥ=B3 +U+ff69=B3 +ェ=B4 +U+ff6a=B4 +ォ=B5 +U+ff6b=B5 +ャ=D4 +U+ff6c=D4 +ュ=D5 +U+ff6d=D5 +ョ=D6 +U+ff6e=D6 +ッ=C2 +U+ff6f=C2 +a-z=41 +ア-ン=B1 +゙=DE +゚=DF +KATAKANA=>DECOMPOSE diff --git a/include/encoding/petscr.tbl b/include/encoding/petscr.tbl new file mode 100644 index 00000000..e0badc64 --- /dev/null +++ b/include/encoding/petscr.tbl @@ -0,0 +1 @@ +ALIAS=cbmscr diff --git a/include/encoding/petscrjp.tbl b/include/encoding/petscrjp.tbl new file mode 100644 index 00000000..50d72f93 --- /dev/null +++ b/include/encoding/petscrjp.tbl @@ -0,0 +1 @@ +ALIAS=cbmscrjp diff --git a/include/encoding/pokemon1de.tbl b/include/encoding/pokemon1de.tbl new file mode 100644 index 00000000..c458c8f6 --- /dev/null +++ b/include/encoding/pokemon1de.tbl @@ -0,0 +1 @@ +ALIAS=pokemon1fr diff --git a/include/encoding/pokemon1en.tbl b/include/encoding/pokemon1en.tbl new file mode 100644 index 00000000..1712d20e --- /dev/null +++ b/include/encoding/pokemon1en.tbl @@ -0,0 +1,50 @@ +NAME=Pokémon Gen. 1 English +EOT=50 +70-75=‘’“”·⋯ +7F=U+0020 +80-8F=ABCDEFGHIJKLMNOP +90-9F=QRSTUVWXYZ():;[] +A0-AF=abcdefghijklmnop +B0-BA=qrstuvwxyzé +E0=' +E3=- +E6-E8=?!. +EC-EF=▷▶▼♂ +F0-FF=₽×./,♀0123456789 + +{'d}=BB +{would}=BB +{had}=BB +{'l}=BC +{wil}=BC +{'s}=BD +{is}=BD +{'t}=BE +{ot}=BE +{'v}=BF +{hav}=BF +{'r}=E4 +{ar}=E4 +{'m}=E5 +{am}=E5 +{PK}=E1 +{pk}=E1 +{MN}=E2 +{mn}=E2 +{PC}=5B +{pc}=5B +{TM}=5C +{tm}=5C +{TRAINER}=5D +{trainer}=5D +{ROCKET}=5E +{rocket}=5E +{poke}=54 +{Poke}=54 +{poké}=54 +{Poké}=54 +{pkmn}=4A +{PKMN}=4A +{......}=56 + +{n}=4E diff --git a/include/encoding/pokemon1es.tbl b/include/encoding/pokemon1es.tbl new file mode 100644 index 00000000..209ff539 --- /dev/null +++ b/include/encoding/pokemon1es.tbl @@ -0,0 +1,30 @@ +NAME=Pokémon Gen. 1 Spanish/Italian +EOT=50 +70-75=‘’“”·⋯ +7F=U+0020 +80-8F=ABCDEFGHIJKLMNOP +90-9F=QRSTUVWXYZ():;[] +A0-AF=abcdefghijklmnop +B0-BF=qrstuvwxyzàèéùÀÁ +C0-CF=ÄÖÜäöüÈÉÌÍÑÒÓÙÚá +D0-D7=ìíñòóúº& +E0=' +E3=- +E6-E8=?!. +EC-EF=▷▶▼♂ +F0-FF=₽×./,♀0123456789 + +{'d}=D8 +{'l}=D9 +{'m}=DA +{'r}=DB +{'s}=DC +{'t}=DD +{'v}=DE + +{PK}=E1 +{pk}=E1 +{MN}=E2 +{mn}=E2 + +{n}=4E diff --git a/include/encoding/pokemon1fr.tbl b/include/encoding/pokemon1fr.tbl new file mode 100644 index 00000000..22de6e11 --- /dev/null +++ b/include/encoding/pokemon1fr.tbl @@ -0,0 +1,46 @@ +NAME=Pokémon Gen. 1 French/German +EOT=50 +70-75=‘’“”·⋯ +7F=U+0020 +80-8F=ABCDEFGHIJKLMNOP +90-9F=QRSTUVWXYZ():;[] +A0-AF=abcdefghijklmnop +B0-BF=qrstuvwxyzàèéùßç +C0-CC=ÄÖÜäöüëïâôûêî +E0=' +E3-E4=-+ +E6-E8=?!. +EC-EF=▷▶▼♂ +F0-FF=₽×./,♀0123456789 + +{c'}=d4 +{ce}=d4 +{d'}=d5 +{de}=d5 +{j'}=d6 +{je}=d6 +{l'}=d7 +{le}=d7 +{la}=d7 +{m'}=d8 +{me}=d8 +{n'}=d9 +{ne}=d9 +{p'}=da +{s'}=db +{se}=db +{si}=db +{'s}=dc +{es}=dc +{t'}=dd +{te}=dd +{u'}=de +{ue}=de +{y'}=df + +{PK}=E1 +{pk}=E1 +{MN}=E2 +{mn}=E2 + +{n}=4E diff --git a/include/encoding/pokemon1it.tbl b/include/encoding/pokemon1it.tbl new file mode 100644 index 00000000..d3eb9833 --- /dev/null +++ b/include/encoding/pokemon1it.tbl @@ -0,0 +1 @@ +ALIAS=pokemon1es diff --git a/include/encoding/pokemon1jp.tbl b/include/encoding/pokemon1jp.tbl new file mode 100644 index 00000000..14d6884c --- /dev/null +++ b/include/encoding/pokemon1jp.tbl @@ -0,0 +1,26 @@ +NAME=Pokémon Gen. 1 Japanese +EOT=50 +60-6F=ABCDEFGHIVSLM:ぃぅ +70-78=「」『』・…ぁぇぉ +7f=U+3000 +80-8F=アイウエオカキクケコサシスセソタ +90-9f=チツテトナニヌネノハヒフホマミム +a0-af=メモヤユヨラルレロワヲンッャュョ +b0-bf=ィあいうえおかきくけこさしすせそ +c0-cf=たちつてとなにぬねのはひふへほま +d0-df=みむめもやゆよらりるれろわをんっ +e0-ef=ゃゅょー゜゛?!。ァゥェ▷▶▼♂ +f0-ff=円×./ォ♀0123456789 + +U+0020=7f +:=6D +·=74 +が=4a +{n}=4E + +HIRAGANA=>DECOMPOSE +KATAKANA=>DECOMPOSE +{パソコン}=5b +{わざマシン}=5c +{トレーナー}=5d +{ロケットだん}=5e diff --git a/include/encoding/short_koi.tbl b/include/encoding/short_koi.tbl new file mode 100644 index 00000000..2f4ceeb3 --- /dev/null +++ b/include/encoding/short_koi.tbl @@ -0,0 +1 @@ +ALIAS=koi7n2 diff --git a/include/encoding/sinclair.tbl b/include/encoding/sinclair.tbl new file mode 100644 index 00000000..eb9eca4d --- /dev/null +++ b/include/encoding/sinclair.tbl @@ -0,0 +1,39 @@ +NAME=Sinclair +EOT=00 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +60-7f=£abcdefghijklmnopqrstuvwxyz{|}~© + +↑=5e +{n}=0d +{q}=22 +{apos}=27 +{pound}=60 +{lbrace}=7b +{rbrace}=7d +{copy}=7f + +{up}=0b +{down}=0a +{left}=08 +{right}=09 +{white}=1007 +{black}=1008 +{red}=1002 +{blue}=1001 +{green}=1004 +{cyan}=1005 +{purple}=1003 +{yellow}=1006 +{bgwhite}=1107 +{bgblack}=1108 +{bgred}=1102 +{bgblue}=1101 +{bggreen}=1104 +{bgcyan}=1105 +{bgpurple}=1103 +{bgyellow}=1106 +{reverse}=1401 +{reverseoff}=1400 diff --git a/include/encoding/utf16be.tbl b/include/encoding/utf16be.tbl new file mode 100644 index 00000000..a4579a8f --- /dev/null +++ b/include/encoding/utf16be.tbl @@ -0,0 +1,2 @@ +NAME=UTF-16BE +BUILTIN=UTF-16BE \ No newline at end of file diff --git a/include/encoding/utf16le.tbl b/include/encoding/utf16le.tbl new file mode 100644 index 00000000..503713e0 --- /dev/null +++ b/include/encoding/utf16le.tbl @@ -0,0 +1,2 @@ +NAME=UTF-16LE +BUILTIN=UTF-16LE \ No newline at end of file diff --git a/include/encoding/utf8.tbl b/include/encoding/utf8.tbl new file mode 100644 index 00000000..8eaf1386 --- /dev/null +++ b/include/encoding/utf8.tbl @@ -0,0 +1,2 @@ +NAME=UTF-8 +BUILTIN=UTF-8 \ No newline at end of file diff --git a/include/encoding/vectrex.tbl b/include/encoding/vectrex.tbl new file mode 100644 index 00000000..8e8accc8 --- /dev/null +++ b/include/encoding/vectrex.tbl @@ -0,0 +1,14 @@ +NAME=Vectrex +EOT=80 + +20=U+0020 +21-3f=!"#$%&'()*+,-./0123456789:;<=>? +40-5f=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ +61=↑ +63=↓ +67=© +6c=∞ +a-z=41 +{copy}=67 +{q}=22 +{apos}=27 diff --git a/include/encoding/zx80.tbl b/include/encoding/zx80.tbl new file mode 100644 index 00000000..377e3bea --- /dev/null +++ b/include/encoding/zx80.tbl @@ -0,0 +1,18 @@ +NAME=ZX80 +EOT=01 + +00=U+0020 +0c-1b=£$:?()-+*/=><;,. +1c-25=0123456789 +26-3f=ABCDEFGHIJKLMNOPQRSTUVWXYZ +d4=" + +{pound}=0c +{q}=d4 +{n}=76 +{b}=77 +{up}=70 +{down}=71 +{left}=72 +{right}=73 +a-z=26 diff --git a/include/encoding/zx81.tbl b/include/encoding/zx81.tbl new file mode 100644 index 00000000..4a0024f3 --- /dev/null +++ b/include/encoding/zx81.tbl @@ -0,0 +1,18 @@ +NAME=ZX81 +EOT=0b + +00=U+0020 +0c-1b=£$:?()><=+-*/;,. +1c-25=0123456789 +26-3f=ABCDEFGHIJKLMNOPQRSTUVWXYZ +c0=" + +{pound}=0c +{q}=c0 +{n}=76 +{b}=77 +{up}=70 +{down}=71 +{left}=72 +{right}=73 +a-z=26 diff --git a/include/platform/cpc464.ini b/include/platform/cpc464.ini index f1dcc876..0eac1309 100644 --- a/include/platform/cpc464.ini +++ b/include/platform/cpc464.ini @@ -1,7 +1,7 @@ ;a single-load Amstrad CPC 464 program [compilation] arch=z80 -encoding=ascii +encoding=cpc_en modules=default_panic,cpc,stdlib [allocation] diff --git a/mkdocs.yml b/mkdocs.yml index 4df2a62b..91fb84ed 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -25,6 +25,7 @@ nav: - Literals: lang/literals.md - Predefined constants: lang/predefined_constants.md - Text encodings: lang/text.md + - Custom text encodings: lang/custom-encoding.md - Operators: lang/operators.md - Functions: lang/functions.md - Inline 6502 assembly: lang/assembly.md diff --git a/src/main/scala/millfork/CompilationOptions.scala b/src/main/scala/millfork/CompilationOptions.scala index bd4824e5..d03266d0 100644 --- a/src/main/scala/millfork/CompilationOptions.scala +++ b/src/main/scala/millfork/CompilationOptions.scala @@ -3,6 +3,7 @@ package millfork import millfork.buildinfo.BuildInfo import millfork.compiler.LabelGenerator import millfork.error.{ConsoleLogger, Logger} +import millfork.parser.TextCodecRepository /** * @author Karol Stasiak @@ -12,6 +13,7 @@ case class CompilationOptions(platform: Platform, outputFileName: Option[String], zpRegisterSize: Int, featureOverrides: Map[String, Long], + textCodecRepository: TextCodecRepository, jobContext: JobContext) { import CompilationFlag._ diff --git a/src/main/scala/millfork/Main.scala b/src/main/scala/millfork/Main.scala index 673b2c3b..482480d1 100644 --- a/src/main/scala/millfork/Main.scala +++ b/src/main/scala/millfork/Main.scala @@ -17,7 +17,7 @@ import millfork.env.Environment import millfork.error.{ConsoleLogger, Logger} import millfork.node.StandardCallGraph import millfork.output._ -import millfork.parser.{MSourceLoadingQueue, MosSourceLoadingQueue, ZSourceLoadingQueue} +import millfork.parser.{MSourceLoadingQueue, MosSourceLoadingQueue, TextCodecRepository, ZSourceLoadingQueue} @@ -56,11 +56,12 @@ object Main { errorReporting.warn("Failed to detect the default include directory, consider using the -I option") } + val textCodecRepository = new TextCodecRepository("." :: c.includePath) val platform = Platform.lookupPlatformFile("." :: c.includePath, c.platform.getOrElse { errorReporting.info("No platform selected, defaulting to `c64`") "c64" - }) - val options = CompilationOptions(platform, c.flags, c.outputFileName, c.zpRegisterSize.getOrElse(platform.zpRegisterSize), c.features, JobContext(errorReporting, new LabelGenerator)) + }, textCodecRepository) + val options = CompilationOptions(platform, c.flags, c.outputFileName, c.zpRegisterSize.getOrElse(platform.zpRegisterSize), c.features, textCodecRepository, JobContext(errorReporting, new LabelGenerator)) errorReporting.debug("Effective flags: ") options.flags.toSeq.sortBy(_._1).foreach{ case (f, b) => errorReporting.debug(f" $f%-30s : $b%s") diff --git a/src/main/scala/millfork/Platform.scala b/src/main/scala/millfork/Platform.scala index 38cff762..4a8d670f 100644 --- a/src/main/scala/millfork/Platform.scala +++ b/src/main/scala/millfork/Platform.scala @@ -7,7 +7,7 @@ import java.util.Locale import millfork.error.Logger import millfork.output._ -import millfork.parser.{TextCodec, TextCodecWithFlags} +import millfork.parser.{TextCodec, TextCodecRepository, TextCodecWithFlags} import org.apache.commons.configuration2.INIConfiguration /** @@ -64,23 +64,23 @@ class Platform( object Platform { - def lookupPlatformFile(includePath: List[String], platformName: String)(implicit log: Logger): Platform = { + def lookupPlatformFile(includePath: List[String], platformName: String, textCodecRepository: TextCodecRepository)(implicit log: Logger): Platform = { includePath.foreach { dir => val file = Paths.get(dir, platformName + ".ini").toFile log.debug("Checking " + file) if (file.exists()) { - return load(file) + return load(file, textCodecRepository) } val file2 = Paths.get(dir, "platform", platformName + ".ini").toFile log.debug("Checking " + file2) if (file2.exists()) { - return load(file2) + return load(file2, textCodecRepository) } } log.fatal(s"Platform definition `$platformName` not found", None) } - def load(file: File)(implicit log: Logger): Platform = { + def load(file: File, textCodecRepository: TextCodecRepository)(implicit log: Logger): Platform = { val conf = new INIConfiguration() val bytes = Files.readAllBytes(file.toPath) conf.read(new StringReader(new String(bytes, StandardCharsets.UTF_8))) @@ -131,7 +131,7 @@ object Platform { val codecName = cs.get(classOf[String], "encoding", "ascii") val srcCodecName = cs.get(classOf[String], "screen_encoding", codecName) - val TextCodecWithFlags(codec, czt, clp, _) = TextCodec.forName(codecName, None, log) + val TextCodecWithFlags(codec, czt, clp, _) = textCodecRepository.forName(codecName, None, log) if (czt) { log.error("Default encoding cannot be zero-terminated") } @@ -141,7 +141,7 @@ object Platform { if (codec.stringTerminator.length != 1) { log.warn("Default encoding should be byte-based") } - val TextCodecWithFlags(srcCodec, szt, slp, _) = TextCodec.forName(srcCodecName, None, log) + val TextCodecWithFlags(srcCodec, szt, slp, _) = textCodecRepository.forName(srcCodecName, None, log) if (szt) { log.error("Default screen encoding cannot be zero-terminated") } @@ -323,9 +323,10 @@ object Platform { codec.encodeDigit(c) == srcCodec.encodeDigit(c) }), "ENCCONV_SUPPORTED" -> toLong((codec.name, srcCodec.name) match { - case (TextCodec.Petscii.name, TextCodec.CbmScreencodes.name) | - (TextCodec.PetsciiJp.name, TextCodec.CbmScreencodesJp.name) | - (TextCodec.Atascii.name, TextCodec.AtasciiScreencodes.name) => + // TODO: don't rely on names! + case ("PETSCII", "CBM-Screen") | + ("PETSCII-JP", "CBM-Screen-JP") | + ("ATASCII", "ATASCII-Screen") => CpuFamily.forType(cpu) == CpuFamily.M6502 case _ => codec.name == srcCodec.name }), diff --git a/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala b/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala index 3416cb8d..fb255394 100644 --- a/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala +++ b/src/main/scala/millfork/parser/AbstractSourceLoadingQueue.scala @@ -25,11 +25,12 @@ abstract class AbstractSourceLoadingQueue[T](val initialFilenames: List[String], def pseudoModules: List[DeclarationStatement] = { val encodingConversionAliases = (options.platform.defaultCodec.name, options.platform.screenCodec.name) match { - case (TextCodec.Petscii.name, TextCodec.CbmScreencodes.name) | - (TextCodec.PetsciiJp.name, TextCodec.CbmScreencodesJp.name)=> + // TODO: don't rely on names! + case ("PETSCII", "CBM-Screen") | + ("PETSCII-JP", "CBM-Screen-JP") => List(AliasDefinitionStatement("__from_screencode", "petscr_to_petscii", important = false), AliasDefinitionStatement("__to_screencode", "petscii_to_petscr", important = false)) - case (TextCodec.Atascii.name, TextCodec.AtasciiScreencodes.name)=> + case ("ATASCII", "ATASCII-Screen") => List(AliasDefinitionStatement("__from_screencode", "atasciiscr_to_atascii", important = false), AliasDefinitionStatement("__to_screencode", "atascii_to_atasciiscr", important = false)) case _ => Nil diff --git a/src/main/scala/millfork/parser/MfParser.scala b/src/main/scala/millfork/parser/MfParser.scala index 1c5c2de2..73a0f0fb 100644 --- a/src/main/scala/millfork/parser/MfParser.scala +++ b/src/main/scala/millfork/parser/MfParser.scala @@ -109,7 +109,7 @@ abstract class MfParser[T](fileId: String, input: String, currentDirectory: Stri case "scrz" => TextCodecWithFlags(options.platform.screenCodec, nullTerminated = true, lengthPrefixed = false, lenient = lenient) case "pscr" => TextCodecWithFlags(options.platform.screenCodec, nullTerminated = false, lengthPrefixed = true, lenient = lenient) case "pscrz" => TextCodecWithFlags(options.platform.screenCodec, nullTerminated = true, lengthPrefixed = true, lenient = lenient) - case _ => TextCodec.forName(encoding, Some(position), log) + case _ => options.textCodecRepository.forName(encoding, Some(position), log) } } diff --git a/src/main/scala/millfork/parser/TextCodec.scala b/src/main/scala/millfork/parser/TextCodec.scala index 35d877b3..ab0d1729 100644 --- a/src/main/scala/millfork/parser/TextCodec.scala +++ b/src/main/scala/millfork/parser/TextCodec.scala @@ -12,7 +12,7 @@ import millfork.node.Position * @author Karol Stasiak */ -final case class TextCodecWithFlags(code: TextCodec, nullTerminated: Boolean, lengthPrefixed: Boolean, lenient: Boolean) +final case class TextCodecWithFlags(codec: TextCodec, nullTerminated: Boolean, lengthPrefixed: Boolean, lenient: Boolean) sealed trait TextCodec { def name: String @@ -44,15 +44,19 @@ class UnicodeTextCodec(override val name: String, val charset: Charset, override "b" -> '\b', "null" -> '\0', "nullchar" -> '\0', + "nbsp" -> '\u00a0', + "shy" -> '\u00ad', "apos" -> '\'', "q" -> '\"', "lbrace" -> '{', "rbrace" -> '}', + "cent" -> '¢', "pound" -> '£', "euro" -> '€', "yen" -> '¥', "pi" -> 'π', - "copy" -> '©' + "copy" -> '©', + "ss" -> 'ß' ) private def encodeEscapeSequence(log: Logger, escSeq: String, position: Option[Position], options: CompilationOptions, lenient: Boolean): List[Int] = { @@ -126,10 +130,11 @@ class UnicodeTextCodec(override val name: String, val charset: Charset, override class TableTextCodec(override val name: String, val stringTerminatorChar: Int, - private val map: String, - private val extra: Map[Char, Int], - private val decompositions: Map[Char, String], - private val escapeSequences: Map[String, List[Int]]) extends TextCodec { + val map: String, + val extra: Map[Char, Int], + val decompositions: Map[Char, String], + val directDecompositions: Map[Char, List[Int]], + val escapeSequences: Map[String, List[Int]]) extends TextCodec { override val stringTerminator: List[Int] = List(stringTerminatorChar) @@ -178,6 +183,8 @@ class TableTextCodec(override val name: String, private def encodeChar(log: Logger, position: Option[Position], c: Char, options: CompilationOptions, lenient: Boolean): Option[List[Int]] = { if (decompositions.contains(c)) { Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, options, lenient).getOrElse(List(x.toInt)))) + } else if (directDecompositions.contains(c)) { + Some(directDecompositions(c)) } else if (extra.contains(c)) Some(List(extra(c))) else { val index = map.indexOf(c) if (index >= 0) { @@ -267,87 +274,10 @@ class TableTextCodec(override val name: String, } object TextCodec { - lazy val allCodecs = Map( - "ascii" -> TextCodec.Ascii, - "petscii" -> TextCodec.Petscii, - "pet" -> TextCodec.Petscii, - "petsciijp" -> TextCodec.PetsciiJp, - "petjp" -> TextCodec.PetsciiJp, - "oldpetscii" -> TextCodec.OldPetscii, - "oldpet" -> TextCodec.OldPetscii, - "origpetscii" -> TextCodec.OriginalPetscii, - "origpet" -> TextCodec.OriginalPetscii, - "cbmscr" -> TextCodec.CbmScreencodes, - "petscr" -> TextCodec.CbmScreencodes, - "cbmscrjp" -> TextCodec.CbmScreencodesJp, - "petscrjp" -> TextCodec.CbmScreencodesJp, - "atascii" -> TextCodec.Atascii, - "atari" -> TextCodec.Atascii, - "atasciiscr" -> TextCodec.AtasciiScreencodes, - "atariscr" -> TextCodec.AtasciiScreencodes, - "bbc" -> TextCodec.Bbc, - "sinclair" -> TextCodec.Sinclair, - "apple2" -> TextCodec.Apple2, - "jis" -> TextCodec.Jis, - "jisx" -> TextCodec.Jis, - "iso_de" -> TextCodec.IsoIec646De, - "iso_no" -> TextCodec.IsoIec646No, - "iso_dk" -> TextCodec.IsoIec646No, - "iso_se" -> TextCodec.IsoIec646Se, - "iso_fi" -> TextCodec.IsoIec646Se, - "iso_yu" -> TextCodec.IsoIec646Yu, - "msx_intl" -> TextCodec.MsxWest, - "msx_us" -> TextCodec.MsxWest, - "msx_uk" -> TextCodec.MsxWest, - "msx_de" -> TextCodec.MsxWest, - "msx_fr" -> TextCodec.MsxWest, - "msx_es" -> TextCodec.MsxWest, - "msx_ru" -> TextCodec.MsxRu, - "msx_jp" -> TextCodec.MsxJp, - "msx_br" -> TextCodec.MsxBr, - "vectrex" -> TextCodec.Vectrex, - "koi7n2" -> TextCodec.Koi7N2, - "short_koi" -> TextCodec.Koi7N2, - "zx80" -> TextCodec.Zx80, - "zx81" -> TextCodec.Zx81, - "iso8859_15" -> TextCodec.Iso8859_15, - "latin0" -> TextCodec.Iso8859_15, - "latin9" -> TextCodec.Iso8859_15, - "iso15" -> TextCodec.Iso8859_15, - "utf8" -> TextCodec.Utf8, - "utf16be" -> TextCodec.Utf16Be, - "utf16le" -> TextCodec.Utf16Le, - ) - - def forName(name: String, position: Option[Position], log: Logger): TextCodecWithFlags = { - if (allCodecs.contains(name)) return TextCodecWithFlags(allCodecs(name), nullTerminated = false, lengthPrefixed = false, lenient = false) - if (name.endsWith("z")) { - val cleanName = name.stripSuffix("z") - if (allCodecs.contains(cleanName)) return TextCodecWithFlags(allCodecs(cleanName), nullTerminated = true, lengthPrefixed = false, lenient = false) - } - val lengthPrefixed = name.startsWith("p") - if (name.startsWith("p")) { - val cleanName = name.stripPrefix("p") - if (allCodecs.contains(cleanName)) return TextCodecWithFlags(allCodecs(cleanName), nullTerminated = false, lengthPrefixed = true, lenient = false) - - if (cleanName.endsWith("z")) { - val cleanName2 = cleanName.stripSuffix("z") - if (allCodecs.contains(cleanName2)) return TextCodecWithFlags(allCodecs(cleanName2), nullTerminated = true, lengthPrefixed = true, lenient = false) - } - } - log.error(s"Unknown string encoding: `$name`", position) - TextCodecWithFlags(TextCodec.Ascii, nullTerminated = false, lengthPrefixed = false, lenient = false) - } - - private val Utf8 = new UnicodeTextCodec("UTF-8", StandardCharsets.UTF_8, List(0)) - - private val Utf16Be = new UnicodeTextCodec("UTF-16BE", StandardCharsets.UTF_16BE, List(0, 0)) - - private val Utf16Le = new UnicodeTextCodec("UTF-16LE", StandardCharsets.UTF_16LE, List(0, 0)) val NotAChar = '\ufffd' - private lazy val DefaultOverrides: Map[Char, Int] = ('\u2400' to '\u2420').map(c => c->(c.toInt - 0x2400)).toMap + ('\u2421' -> 127) + lazy val DefaultOverrides: Map[Char, Int] = ('\u2400' to '\u2420').map(c => c->(c.toInt - 0x2400)).toMap + ('\u2421' -> 127) //noinspection ScalaUnusedSymbol private lazy val AsciiEscapeSequences: Map[String, List[Int]] = Map( @@ -371,431 +301,19 @@ object TextCodec { "lbrace" -> List('{'.toInt), "rbrace" -> List('}'.toInt)) - private lazy val StandardKatakanaDecompositions: Map[Char, String] = { + lazy val StandardKatakanaDecompositions: Map[Char, String] = { (("カキクケコサシスセソタチツテトハヒフヘホ")).zip( "ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++ "ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap } - private lazy val StandardHiraganaDecompositions: Map[Char, String] = { + lazy val StandardHiraganaDecompositions: Map[Char, String] = { (("かきくけこさしすせそたちつてとはひふへほ")).zip( "がぎぐげござじずぜぞだぢづでどばびぶべぼ").map { case (u, v) => v -> (u + "゛") }.toMap ++ "はひふへほ".zip("ぱぴぷぺぽ").map { case (h, p) => p -> (h + "゜") }.toMap } - lazy val Ascii = new TableTextCodec("ASCII", 0, 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences) - - lazy val Iso8859_15 = new TableTextCodec("ISO 8859-15", 0, - "\ufffd" * 32 + - 32.until(127).map { i => i.toChar }.mkString + - "\ufffd" + - "\ufffd" * 32 + - "\ufffd¡¢£€¥Š§š©ª«¬\ufffd®¯" + - "°±²³Žµ¶·ž¹º»ŒœŸ¿" + - "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ" + - "ÐÑÒÓÔÕÖרÙÚÛÜÝÞß" + - "àáâãäåæçèéêëìíîï" + - "ðñòóôõö÷øùúûüýþÿ", - Map.empty, Map.empty, AsciiEscapeSequences ++ Map( - "cent" -> List(0xA2), - "pound" -> List(0xA3), - "euro" -> List(0xA4), - "yen" -> List(0xA5), - "copy" -> List(0xA9), - ) - ) - - lazy val Apple2 = new TableTextCodec("APPLE-II", 0, 0.until(255).map { i => - if (i < 0xa0) NotAChar - else if (i < 0xe0) (i - 128).toChar - else NotAChar - }.mkString, - ('a' to 'z').map(l => l -> (l - 'a' + 0xC1)).toMap, Map.empty, MinimalEscapeSequencesWithBraces) - - lazy val IsoIec646De = new TableTextCodec("ISO-IEC-646-DE", 0, - "\ufffd" * 32 + - " !\"#$%^'()*+,-./0123456789:;<=>?" + - "§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_" + - "`abcdefghijklmnopqrstuvwxyzäöüß", - DefaultOverrides, Map.empty, AsciiEscapeSequences ++ Map( - "UE" -> List('['.toInt), - "OE" -> List('\\'.toInt), - "AE" -> List(']'.toInt), - "ue" -> List('{'.toInt), - "oe" -> List('|'.toInt), - "ae" -> List('}'.toInt), - "ss" -> List('~'.toInt) - ) - ) - - lazy val IsoIec646Se = new TableTextCodec("ISO-IEC-646-SE", 0, - "\ufffd" * 32 + - " !\"#¤%^'()*+,-./0123456789:;<=>?" + - "@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_" + - "`abcdefghijklmnopqrstuvwxyzäöå~", - Map('¯' -> '~'.toInt, - '‾' -> '~'.toInt, - 'É' -> '@'.toInt, - 'é' -> '`'.toInt, - 'Ü' -> '^'.toInt, - 'ü' -> '~'.toInt, - '$' -> '¤'.toInt), - Map.empty, AsciiEscapeSequences ++ Map( - "AE" -> List('['.toInt), - "OE" -> List('\\'.toInt), - "AA" -> List(']'.toInt), - "ae" -> List('{'.toInt), - "oe" -> List('|'.toInt), - "aa" -> List('}'.toInt) - ) - ) - - lazy val IsoIec646No = new TableTextCodec("ISO-IEC-646-NO", 0, - "\ufffd" * 32 + - " !\"#$%^'()*+,-./0123456789:;<=>?" + - "@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_" + - "`abcdefghijklmnopqrstuvwxyzæøå~", - Map('¯' -> '~'.toInt, - '‾' -> '~'.toInt, - '|' -> '~'.toInt, - '¤' -> '$'.toInt, - 'Ä' -> '@'.toInt, - 'ä' -> '`'.toInt, - 'Ü' -> '^'.toInt, - 'ü' -> '~'.toInt, - '«' -> '"'.toInt, - '»' -> '"'.toInt, - '§' -> '#'.toInt), - Map.empty, AsciiEscapeSequences ++ Map( - "AE" -> List('['.toInt), - "OE" -> List('\\'.toInt), - "AA" -> List(']'.toInt), - "ae" -> List('{'.toInt), - "oe" -> List('|'.toInt), - "aa" -> List('}'.toInt) - ) - ) - - - lazy val IsoIec646Yu = new TableTextCodec("ISO-IEC-646-YU", 0, - "\ufffd" * 32 + - " !\"#$%^'()*+,-./0123456789:;<=>?" + - "ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_" + - "žabcdefghijklmnopqrstuvwxyzšđćč", - Map('Ë' -> '$'.toInt, 'ë' -> '_'.toInt), - Map.empty, AsciiEscapeSequences) - - val CbmScreencodes = new TableTextCodec("CBM-Screen", 0xE0, - "@abcdefghijklmnopqrstuvwxyz[£]↑←" + - 0x20.to(0x3f).map(_.toChar).mkString + - "–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ", - Map('^' -> 0x1E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51), - Map.empty, MinimalEscapeSequencesWithoutBraces ++ Map( - "pound" -> List(0x1c), - "pi" -> List(0x5f), - ) - ) - - lazy val CbmScreencodesJp = new TableTextCodec("CBM-Screen-JP", 0xE0, - "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑←" + // 00-1f - 0x20.to(0x3f).map(_.toChar).mkString + - "タチツテトナニヌネノハヒフヘホマ" + // 40-4f - "ミムメモヤユヨラリルレロワン゛゜" + // 50-5f - "\ufffd円年月\ufffd\ufffdヲ\ufffd" + // 60-67 - "πアイウエオカキクケコサシスセソ" + // 70-7f - "", - Map('^' -> 0x1E, '\\' -> 0x1C, - '♥' -> 0x44, '♡' -> 0x44, '♠' -> 0x41, '♣' -> 0x7B, '♢' -> 0x42, '•' -> 0x5D, - 'ー' -> '-'.toInt, 0xff70.toChar -> '-'.toInt, 0xff66.toChar -> 0x66, - 'ヮ' -> 0x5C, 'ヵ' -> 0x76, 'ヶ' -> 0x79, - 'ァ' -> 0x71, 0xff67.toChar -> 0x71, - 'ィ' -> 0x72, 0xff68.toChar -> 0x72, - 'ゥ' -> 0x73, 0xff69.toChar -> 0x73, - 'ェ' -> 0x74, 0xff6a.toChar -> 0x74, - 'ォ' -> 0x75, 0xff6b.toChar -> 0x75, - 'ャ' -> 0x54, 0xff6c.toChar -> 0x54, - 'ュ' -> 0x55, 0xff6d.toChar -> 0x55, - 'ョ' -> 0x56, 0xff6e.toChar -> 0x56, - 'ッ' -> 0x42, 0xff6f.toChar -> 0x42 - ) ++ - ('a' to 'z').map(l => l -> (l - 'a' + 1)) ++ - (1 to 0xf).map(i => (i + 0xff70).toChar -> (i + 0x70)) ++ - (0x10 to 0x2f).map(i => (i + 0xff70).toChar -> (i + 0x40)), - StandardKatakanaDecompositions, MinimalEscapeSequencesWithoutBraces ++ Map( - "pi" -> List(0x70), - "yen" -> List(0x1c), - ) - ) - - lazy val Petscii = new TableTextCodec("PETSCII", 0, - "\ufffd" * 32 + - 0x20.to(0x3f).map(_.toChar).mkString + - "@abcdefghijklmnopqrstuvwxyz[£]↑←" + - "\ufffd" * 32 + // 60-7f - "\ufffd" * 32 + // 80-9f - "\ufffd" * 32 + // a0-bf - "–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ", // c0-df - Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xD8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map( - "n" -> List(13), - "q" -> List('\"'.toInt), - "pound" -> List(0x5c), - "pi" -> List(0xdf), - "apos" -> List('\''.toInt), - "up" -> List(0x91), - "down" -> List(0x11), - "left" -> List(0x9d), - "right" -> List(0x1d), - "white" -> List(5), - "black" -> List(0x90), - "red" -> List(0x1c), - "blue" -> List(0x1f), - "green" -> List(0x1e), - "cyan" -> List(0x9f), - "purple" -> List(0x9c), - "yellow" -> List(0x9e), - "reverse" -> List(0x12), - "reverseoff" -> List(0x92) - ) - ) - - lazy val PetsciiJp = new TableTextCodec("PETSCII-JP", 0, - "\ufffd" * 32 + - 0x20.to(0x3f).map(_.toChar).mkString + - "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑←" + - "\ufffd" * 32 + // 60-7f - "\ufffd" * 32 + // 80-9f - "\ufffd円年月\ufffd\ufffdヲ\ufffd" + // a0-a7 - "\ufffd" * 8 + // a8-af - "πアイウエオカキクケコサシスセソ" + // b0-bf - "タチツテトナニヌネノハヒフヘホマ" + // c0-cf - "ミムメモヤユヨラリルレロワン゛゜", // d0-df - Map('^' -> 0x5E, '\\' -> 0x5C, - '♥' -> 0xC4, '♡' -> 0x73, '♠' -> 0xC1, '♣' -> 0xBB, '♢' -> 0xC2, '•' -> 0xDD, - 'ー' -> '-'.toInt, 0xff70.toChar -> '-'.toInt, 0xff66.toChar -> 0xa6, - 'ヮ' -> 0xDC, 'ヵ' -> 0xB6, 'ヶ' -> 0xB9, - 'ァ' -> 0xB1, 0xff67.toChar -> 0xB1, - 'ィ' -> 0xB2, 0xff68.toChar -> 0xB2, - 'ゥ' -> 0xB3, 0xff69.toChar -> 0xB3, - 'ェ' -> 0xB4, 0xff6a.toChar -> 0xB4, - 'ォ' -> 0xB5, 0xff6b.toChar -> 0xB5, - 'ャ' -> 0xD4, 0xff6c.toChar -> 0xD4, - 'ュ' -> 0xD5, 0xff6d.toChar -> 0xD5, - 'ョ' -> 0xD6, 0xff6e.toChar -> 0xD6, - 'ッ' -> 0xC2, 0xff6f.toChar -> 0xC2) ++ - ('a' to 'z').map(l => l -> l.toUpper.toInt) ++ - (1 to 0x2f).map(i => (i+0xff70).toChar -> (i+0xb0)), - StandardKatakanaDecompositions, Map( - "n" -> List(13), - "q" -> List('\"'.toInt), - "apos" -> List('\''.toInt), - "yen" -> List(0x5c), - "pi" -> List(0xb0), - "up" -> List(0x91), - "down" -> List(0x11), - "left" -> List(0x9d), - "right" -> List(0x1d), - "white" -> List(5), - "black" -> List(0x90), - "red" -> List(0x1c), - "blue" -> List(0x1f), - "green" -> List(0x1e), - "cyan" -> List(0x9f), - "purple" -> List(0x9c), - "yellow" -> List(0x9e), - "reverse" -> List(0x12), - "reverseoff" -> List(0x92) - ) - ) - - lazy val Vectrex = new TableTextCodec("Vectrex", 0x80, - "\ufffd" * 32 + - 0x20.to(0x3f).map(_.toChar).mkString + - "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" + - "\ufffd↑\ufffd↓\ufffd\ufffd\ufffd©\ufffd\ufffd\ufffd\ufffd∞", - ('a' to 'z').map(l => l -> l.toUpper.toInt).toMap, - Map.empty, Map( - "copy" -> List('g'.toInt) - ) - ) - - lazy val Koi7N2 = new TableTextCodec("KOI-7 N2", 0, - "\ufffd" * 32 + - " !\"#¤%&'()*+,-./" + - "0123456789:;<=>?" + - "@ABCDEFGHIJKLMNO" + - "PQRSTUVWXYZ[\\]^_" + - "ЮАБЦДЕФГХИЙКЛМНО" + - "ПЯРСТУЖВЬЫЗШЭЩЧ", - Map('↑' -> 0x5E, '$' -> 0x24) ++ - ('a' to 'z').map(l => l -> l.toUpper.toInt).toMap ++ - ('а' to 'я').filter(_ != 'ъ').map(l => l -> l.toUpper.toInt).toMap, - Map.empty, Map( - "n" -> List(13), // TODO: ? - "b" -> List(8), // TODO: ? - "q" -> List('\"'.toInt), - "apos" -> List('\''.toInt) - ) - ) - - lazy val OldPetscii = new TableTextCodec("Old PETSCII", 0, - "\ufffd" * 32 + - 0x20.to(0x3f).map(_.toChar).mkString + - "@abcdefghijklmnopqrstuvwxyz[\\]↑←" + - "\ufffd" * 32 + - "\ufffd" * 32 + - "\ufffd" * 32 + - "–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ", - Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xC8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map( - "n" -> List(13), - "q" -> List('\"'.toInt), - "pi" -> List(0xdf), - "apos" -> List('\''.toInt), - "up" -> List(0x91), - "down" -> List(0x11), - "left" -> List(0x9d), - "right" -> List(0x1d), - "reverse" -> List(0x12), - "reverseoff" -> List(0x92) - ) - ) - - lazy val OriginalPetscii = new TableTextCodec("Original PETSCII", 0, - "\ufffd" * 32 + - 0x20.to(0x3f).map(_.toChar).mkString + - "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]↑←" + - "\ufffd" * 32 + - "\ufffd" * 32 + - "\ufffd" * 32 + - "–abcdefghijklmnopqrstuvwxyz\ufffd\ufffd\ufffdπ", - Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xC8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map( - "n" -> List(13), - "q" -> List('\"'.toInt), - "apos" -> List('\''.toInt), - "pi" -> List(0xdf), - "up" -> List(0x91), - "down" -> List(0x11), - "left" -> List(0x9d), - "right" -> List(0x1d), - "reverse" -> List(0x12), - "reverseoff" -> List(0x92) - ) - ) - - lazy val Atascii = new TableTextCodec("ATASCII", 0, - "♡" + - "\ufffd" * 15 + - "♣\ufffd–\ufffd•" + - "\ufffd" * 11 + - 0x20.to(0x5f).map(_.toChar).mkString + - "♢abcdefghijklmnopqrstuvwxyz♠|", - Map('♥' -> 0, '·' -> 0x14), Map.empty, MinimalEscapeSequencesWithoutBraces ++ Seq( - "n" -> List(0x9b), - "up" -> List(0x1c), - "down" -> List(0x1d), - "left" -> List(0x1e), - "right" -> List(0x1f), - "b" -> List(0x7e), - ) - ) - - lazy val AtasciiScreencodes = new TableTextCodec("ATASCII-Screen", 0xDB, - 0x20.to(0x3f).map(_.toChar).mkString + - 0x40.to(0x5f).map(_.toChar).mkString + - "♡" + - "\ufffd" * 15 + - "♣\ufffd–\ufffd•" + - "\ufffd" * 7 + "↑↓←→"+ - "♢abcdefghijklmnopqrstuvwxyz♠|", - Map('♥' -> 0x40, '·' -> 0x54), Map.empty, MinimalEscapeSequencesWithoutBraces - ) - - lazy val Bbc = new TableTextCodec("BBC", 0, - "\ufffd" * 32 + - 0x20.to(0x5f).map(_.toChar).mkString + - "£" + 0x61.to(0x7E).map(_.toChar).mkString + "©", - Map('↑' -> '^'.toInt), Map.empty, MinimalEscapeSequencesWithBraces ++ Map( - "n" -> List(13), - "pound" -> List(0x60), - "copy" -> List(0x7f), - ) - ) - - lazy val Sinclair = new TableTextCodec("Sinclair", 0, - "\ufffd" * 32 + - 0x20.to(0x5f).map(_.toChar).mkString + - "£" + 0x61.to(0x7E).map(_.toChar).mkString + "©", - Map('↑' -> '^'.toInt), Map.empty, Map( - "n" -> List(13), - "q" -> List('\"'.toInt), - "apos" -> List('\''.toInt), - "pound" -> List(0x60), - "copy" -> List(0x7f), - "lbrace" -> List('{'.toInt), - "rbrace" -> List('}'.toInt), - "up" -> List(11), - "down" -> List(10), - "left" -> List(8), - "right" -> List(9), - "white" -> List(0x10, 7), - "black" -> List(0x10, 8), - "red" -> List(0x10, 2), - "blue" -> List(0x10, 1), - "green" -> List(0x10, 4), - "cyan" -> List(0x10, 5), - "purple" -> List(0x10, 3), - "yellow" -> List(0x10, 6), - "bgwhite" -> List(0x11, 7), - "bgblack" -> List(0x11, 8), - "bgred" -> List(0x11, 2), - "bgblue" -> List(0x11, 1), - "bggreen" -> List(0x11, 4), - "bgcyan" -> List(0x11, 5), - "bgpurple" -> List(0x11, 3), - "bgyellow" -> List(0x11, 6), - "reverse" -> List(0x14, 1), - "reverseoff" -> List(0x14, 0) - ) - ) - - lazy val Zx80 = new TableTextCodec("ZX80", 1, - " \ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd" + - "£$:?()-+*/=><;,." + - "0123456789" + - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + - "\ufffd" * (9 * 16) + - "\ufffd\ufffd\ufffd\ufffd\"", - ('a' to 'z').map(l => l -> (l - 'a' + 0x26)).toMap, - Map.empty, Map( - "pound" -> List(0x0c), - "q" -> List(0xd4), - "apos" -> List(212), - "n" -> List(0x76), - "b" -> List(0x77), - "up" -> List(0x70), - "down" -> List(0x71), - "left" -> List(0x72), - "right" -> List(0x73), - ) - ) - - lazy val Zx81 = new TableTextCodec("ZX81", 11, - " \ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd" + - "£$:?()><=+-*/;,." + - "0123456789" + - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + - "\ufffd" * (8 * 16) + - "\"", - ('a' to 'z').map(l => l -> (l - 'a' + 0x26)).toMap, - Map.empty, Map( - "pound" -> List(0x0c), - "q" -> List(0xc0), - "n" -> List(0x76), - "b" -> List(0x77), - "up" -> List(0x70), - "down" -> List(0x71), - "left" -> List(0x72), - "right" -> List(0x73), - ) - ) + // The only built-in encoding: + lazy val Ascii = new TableTextCodec("ASCII", 0, 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, Map.empty, AsciiEscapeSequences) private val jisHalfwidthKatakanaOrder: String = "\ufffd。「」、・ヲァィゥェォャュョッ" + @@ -803,147 +321,6 @@ object TextCodec { "タチツテトナニヌネノハヒフヘホマ" + "ミムメモヤユヨラリルレロワン゛゜" - //noinspection ScalaUnnecessaryParentheses - lazy val Jis = new TableTextCodec("JIS-X-0201", 0, - "\ufffd" * 32 + - ' '.to('Z').mkString + - "[¥]^_" + - "`" + 'a'.to('z').mkString + "{|}~\ufffd" + - "\ufffd" * 32 + - jisHalfwidthKatakanaOrder + - "\ufffd" * 8 + - "♠♡♢♣" + - "\ufffd" * 4 + - "円年月日時分秒" + - "\ufffd" * 3 + "\\", - Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++ - 1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap, - StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces ++ Map( - "n" -> List(13, 10), - "yen" -> List(0x5c) - ) - ) - - lazy val MsxWest = new TableTextCodec("MSX-International", 0, - "\ufffd" * 32 + - (0x20 to 0x7e).map(_.toChar).mkString("") + - "\ufffd" + - "ÇüéâäàåçêëèïîìÄÅ" + - "ÉæÆôöòûùÿÖÜ¢£¥₧ƒ" + - "áíóúñѪº¿⌐¬½¼¡«»" + - "ÃãĨĩÕõŨũIJij¾\ufffd\ufffd‰¶§" + - "\ufffd" * 24 + - "Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" + - "αβΓΠΣσµγΦθΩδ∞∅∈∩" + - "≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²", - Map('ß' -> 0xE1, '¦' -> 0x7C, 'Ő' -> 0xB4, 'ő' -> 0xB5, 'Ű' -> 0xB6, 'ű' -> 0xB7), - Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") , - MinimalEscapeSequencesWithBraces ++ Map( - "right" -> List(0x1c), - "left" -> List(0x1d), - "up" -> List(0x1e), - "down" -> List(0x1f), - "b" -> List(8), - "n" -> List(13, 10), - "pound" -> List(0x9c), - "yen" -> List(0x9d), - ) - ) - - lazy val MsxBr = new TableTextCodec("MSX-BR", 0, - "\ufffd" * 32 + - (0x20 to 0x7e).map(_.toChar).mkString("") + - "\ufffd" + - "ÇüéâÁà\ufffdçêÍÓÚÂÊÔÀ" + - "ÉæÆôöòûùÿÖÜ¢£¥₧ƒ" + - "áíóúñѪº¿⌐¬½¼¡«»" + - "ÃãĨĩÕõŨũIJij¾\ufffd\ufffd‰¶§" + - "\ufffd" * 24 + - "Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" + - "αβΓΠΣσµγΦθΩδ∞∅∈∩" + - "≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²", - Map('ß' -> 0xE1, '¦' -> 0x7C, 'Ő' -> 0xB4, 'ő' -> 0xB5, 'Ű' -> 0xB6, 'ű' -> 0xB7), - Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") , - MinimalEscapeSequencesWithBraces ++ Map( - "right" -> List(0x1c), - "left" -> List(0x1d), - "up" -> List(0x1e), - "down" -> List(0x1f), - "n" -> List(13, 10), - "b" -> List(8), - "pound" -> List(0x9c), - "yen" -> List(0x9d), - ) - ) - - lazy val MsxRu = new TableTextCodec("MSX-RU", 0, - "\ufffd" * 32 + - (0x20 to 0x7e).map(_.toChar).mkString("") + - "\ufffd" + - "\ufffd" * 16 + - "\ufffd" * 8 + - "Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" + - "αβΓΠΣσµγΦθΩδ∞∅∈∩" + - "≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²\ufffd¤" + - "юабцдефгхийклмнопярстужвьызшэщчъ" + - "ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩ", - Map('ß' -> 0xA1, '¦' -> 0x7C), - Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G"), - MinimalEscapeSequencesWithBraces ++ Map( - "right" -> List(0x1c), - "left" -> List(0x1d), - "up" -> List(0x1e), - "down" -> List(0x1f), - "b" -> List(8), - "n" -> List(13, 10) - ) - ) - - lazy val MsxJp = new TableTextCodec("MSX-JP", 0, - "\ufffd" * 32 + - (0x20 to 0x7e).map(c => if (c == 0x5c) '¥' else c.toChar).mkString("") + - "\ufffd" + - "♠♡♣♢\uffdd·をぁぃぅぇぉゃゅょっ" + - " あいうえおかきくけこさしすせそ" + - jisHalfwidthKatakanaOrder + - "たちつてとなにぬねのはひふへほま" + - "みむめもやゆよらりるれろわん" + - "" + - "", - Map('♥' -> 0x81, '¦' -> 0x7C) ++ - 1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap, - Map( - '月' -> "\u0001A", - '火' -> "\u0001B", - '水' -> "\u0001C", - '木' -> "\u0001D", - '金' -> "\u0001E", - '土' -> "\u0001F", - '日' -> "\u0001G", - '年' -> "\u0001H", - '円' -> "\u0001I", - '時' -> "\u0001J", - '分' -> "\u0001K", - '秒' -> "\u0001L", - '百' -> "\u0001M", - '千' -> "\u0001N", - '万' -> "\u0001O", - '大' -> "\u0001]", - '中' -> "\u0001^", - '小' -> "\u0001_" - ) ++ - StandardHiraganaDecompositions ++ StandardKatakanaDecompositions, - MinimalEscapeSequencesWithBraces ++ Map( - "right" -> List(0x1c), - "left" -> List(0x1d), - "up" -> List(0x1e), - "down" -> List(0x1f), - "b" -> List(8), - "n" -> List(13, 10), - "yen" -> List(0x5c) - ) - ) - lazy val lossyAlternatives: Map[Char, List[String]] = { val allowLowercase: Map[Char, List[String]] = ('A' to 'Z').map(c => c -> List(c.toString.toLowerCase(Locale.ROOT))).toMap val allowUppercase: Map[Char, List[String]] = ('a' to 'z').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap diff --git a/src/main/scala/millfork/parser/TextCodecRepository.scala b/src/main/scala/millfork/parser/TextCodecRepository.scala new file mode 100644 index 00000000..d1aa2562 --- /dev/null +++ b/src/main/scala/millfork/parser/TextCodecRepository.scala @@ -0,0 +1,234 @@ +package millfork.parser + +import java.nio.charset.StandardCharsets +import java.nio.file.{Files, Paths} +import java.util.Locale + +import millfork.error.Logger +import millfork.node.Position + +import scala.collection.mutable +import scala.collection.convert.ImplicitConversionsToScala._ +import scala.util.matching.Regex + +/** + * @author Karol Stasiak + */ +class TextCodecRepository(val includePath: List[String]) { + private var cache: mutable.Map[String, Option[TextCodec]] = mutable.Map() + + private def parse(shortname: String, lines: Seq[String], log: Logger): Option[TextCodec] = { + import TextCodecRepository.{COMMENT, LINE, SINGLEHEX, HEXRANGE, UNICODECODEPOINT, CHAR, ESCAPE, HEXSTRING, DEPRECATED} + val kvs = lines.flatMap{ + case LINE(k,v) => Some(k, v) + case COMMENT() => None + case DEPRECATED(msg) => + log.warn(s"Encoding $shortname is deprecated: $msg") + None + case line => + log.error(s"Unexpected line in encoding $shortname: $line") + None + } + if (kvs.length == 1 && kvs.head._1 == "ALIAS") { + val actualName = kvs.head._2 + load(actualName, log) + return cache(actualName) + } + var name = "" + var builtin = "" + var terminator = -1 + val map = Array.fill[Char](256)('\ufffd') + val extras = mutable.Map[Char, Int]() + val decompositions = mutable.Map[Char, String]() + val directDecompositions = mutable.Map[Char, List[Int]]() + val escapeSequences = mutable.Map[String, List[Int]]() + + def hexToInt(h: String): Int = Integer.parseInt(h, 16) + + def hexToInts(h: String): List[Int] = if (h.length % 2 == 0) { + h.grouped(2).map(b => Integer.parseInt(b, 16)).toList + } else { + log.error(s"Odd number of hex digits in encoding $shortname: $h") + Nil + } + + def putToMap(ix: Int, c: Char): Unit = { + if (map(ix) != '\ufffd') log.error(s"Multiple characters in encoding $shortname defined for 0x${ix.toHexString}") + map(ix) = c + } + + kvs.foreach{ + case ("NAME", n) => + if (name != "") log.error(s"Encoding $shortname has multiple names") + name = n + case ("BUILTIN", b) => + if (builtin != "") log.error(s"Encoding $shortname refers to multiple built-ins") + builtin = b.toUpperCase(Locale.ROOT) + case ("ALIAS", n) => log.error(s"ALIAS encoding $shortname cannot contain any other entries") + case ("EOT", SINGLEHEX(h)) => + if (terminator != -1) log.error(s"Encoding $shortname has multiple string terminators") + terminator = hexToInt(h) + case ("EOT", s) => log.error(s"Invalid string terminator in encoding $shortname: $s") + case (SINGLEHEX(h), UNICODECODEPOINT(u)) => + val c = hexToInt(u) + if (c > 0xffff) log.error(s"Invalid astral character $h=U+$u in encoding $shortname") + else putToMap(hexToInt(h), c.toChar) + case (SINGLEHEX(h), CHAR(c)) => putToMap(hexToInt(h), c.head) + case (SINGLEHEX(h), s) => log.error(s"Invalid character $h=$s in encoding $shortname") + case (range@HEXRANGE(f, t), cs) => + val from = hexToInt(f) + val to = hexToInt(t) + if (cs.length != to - from + 1) log.error(s"Mismatched range length and character count for $range in encoding $shortname") + for (i <- 0 until ((to - from + 1) min cs.length)) { + putToMap(from + i, cs(i)) + } + case (range@HEXRANGE(_, _), s) => log.error(s"Invalid character range $range=$s in encoding $shortname") + case (ESCAPE(e), HEXSTRING(h)) => escapeSequences(e) = hexToInts(h) + case (ESCAPE(e), s) => log.error(s"Invalid escape sequence {$e}=$s in encoding $shortname") + + case (CHAR(e), HEXSTRING(h)) => directDecompositions(e.head) = hexToInts(h) + case (UNICODECODEPOINT(u), HEXSTRING(h)) => + val c = hexToInt(u) + if (c > 0xffff) log.error(s"Invalid astral character U+$u=$h in encoding $shortname") + else directDecompositions(c.toChar) = hexToInts(h) + + case (CHAR(e), SINGLEHEX(h)) => extras(e.head) = hexToInt(h) + case (UNICODECODEPOINT(u), SINGLEHEX(h)) => + val c = hexToInt(u) + if (c > 0xffff) log.error(s"Invalid astral character U+$u=$h in encoding $shortname") + else extras(c.toChar) = hexToInt(h) + + case (CHAR(e), s) if (s.startsWith(">")) => + if (s.length == 1) log.error(s"Empty decomposition $e=$s in encoding $shortname") + decompositions(e.head) = s.tail + case (UNICODECODEPOINT(u), s) if (s.startsWith(">")) => + if (s.length == 1) log.error(s"Empty decomposition U+$u=$s in encoding $shortname") + val c = hexToInt(u) + if (c > 0xffff) log.error(s"Invalid astral character U+$u=$s in encoding $shortname") + else decompositions(c.toChar) = s.tail + case ("KATAKANA", ">DECOMPOSE" | "DECOMPOSE") => + decompositions ++= TextCodec.StandardKatakanaDecompositions + case ("HIRAGANA", ">DECOMPOSE" | "DECOMPOSE") => + decompositions ++= TextCodec.StandardHiraganaDecompositions + case ("a-z", SINGLEHEX(h)) => + val start = hexToInt(h) + for(c <- 'a' to 'z') { + extras(c) = start + c.toInt - 'a'.toInt + } + case ("ю-ч", SINGLEHEX(h)) => + val start = hexToInt(h) + val koi="юабцдефгхийклмнопярстужвьызшэщч" + for(ix <- 0 until koi.length) { + extras(koi(ix)) = start + ix + } + case ("ア-ン", SINGLEHEX(h)) => + val start = hexToInt(h) + for(c <- 'ア' to 'ン') { + extras(c) = start + c.toInt - 'ア'.toInt + } + case ("ア-ソ", SINGLEHEX(h)) => + val start = hexToInt(h) + for(c <- 'ア' to 'ソ') { + extras(c) = start + c.toInt - 'ア'.toInt + } + case ("タ-ン", SINGLEHEX(h)) => + val start = hexToInt(h) + for(c <- 'タ' to 'ン') { + extras(c) = start + c.toInt - 'タ'.toInt + } + case (k, v) => log.error(s"Invalid command $k=$v in encoding $shortname") + } + if (name == "") { + log.error(s"Nameless encoding $shortname") + } + if (builtin != "") { + if (terminator != -1) log.error(s"Cannot redefine EOT for built-in encoding $builtin in encoding $shortname") + if (decompositions.nonEmpty) log.error(s"Cannot redefine decompositions for built-in encoding $builtin in encoding $shortname") + if (extras.nonEmpty) log.error(s"Cannot redefine extras for built-in encoding $builtin in encoding $shortname") + if (escapeSequences.nonEmpty) log.error(s"Cannot redefine escape sequences for built-in encoding $builtin in encoding $shortname") + if (!map.forall(_ == '\ufffd')) log.error(s"Cannot redefine characters for built-in encoding $builtin in encoding $shortname") + } + builtin match { + case "" => + if (terminator == -1) { + log.error(s"Undefined EOT for encoding $shortname") + } + for (d <- '0' to '9') { + if (map.indexOf(d) < 0) log.warn(s"Missing digit $d in encoding $shortname") + } + Some(new TableTextCodec(name, terminator, map.mkString, extras.toMap, decompositions.toMap, directDecompositions.toMap, escapeSequences.toMap)) + case "UTF-8" => Some(TextCodecRepository.Utf8) + case "UTF-16LE" => Some(TextCodecRepository.Utf16Le) + case "UTF-16BE" => Some(TextCodecRepository.Utf16Be) + case _ => + log.error(s"Unknown built-in encoding $builtin for encoding $shortname") + None + } + } + + private def lookupFile(filename: String, log: Logger): Option[String] = { + includePath.foreach { dir => + val file = Paths.get(dir, filename + ".tbl").toFile + log.trace("Checking " + file) + if (file.exists()) { + return Some(file.getAbsolutePath) + } + val file2 = Paths.get(dir, "encoding", filename + ".tbl").toFile + log.trace("Checking " + file2) + if (file2.exists()) { + return Some(file2.getAbsolutePath) + } + } + log.trace(s"Encoding `$filename` not found") + None + } + + private def load(name: String, log: Logger): Unit = { + if (cache.contains(name)) return + cache(name) = lookupFile(name, log).flatMap(f => { + val lines = Files.readAllLines(Paths.get(f), StandardCharsets.UTF_8).toIndexedSeq + parse(name, lines, log) + }) + } + + def forName(name: String, position: Option[Position], log: Logger): TextCodecWithFlags = { + load(name, log) + load(name.stripSuffix("z"), log) + load(name.stripPrefix("p"), log) + load(name.stripSuffix("z").stripPrefix("p"), log) + cache(name) foreach(c => return TextCodecWithFlags(c, nullTerminated = false, lengthPrefixed = false, lenient = false)) + if (name.endsWith("z")) { + val cleanName = name.stripSuffix("z") + cache(cleanName) foreach(c => return TextCodecWithFlags(c, nullTerminated = true, lengthPrefixed = false, lenient = false)) + } + val lengthPrefixed = name.startsWith("p") + if (name.startsWith("p")) { + val cleanName = name.stripPrefix("p") + cache(cleanName) foreach(c => return TextCodecWithFlags(c, nullTerminated = false, lengthPrefixed = true, lenient = false)) + if (cleanName.endsWith("z")) { + val cleanName2 = cleanName.stripSuffix("z") + cache(cleanName2) foreach(c => return TextCodecWithFlags(c, nullTerminated = true, lengthPrefixed = true, lenient = false)) + } + } + log.error(s"Unknown string encoding: `$name`", position) + TextCodecWithFlags(TextCodec.Ascii, nullTerminated = false, lengthPrefixed = false, lenient = false) + } + +} +object TextCodecRepository { + val LINE: Regex = "\\A\\s*([^=\\s]+)\\s*=\\s*(.+?)\\s*\\z".r + val COMMENT: Regex = "\\A\\s*(?:(?:;|#|//).*)?\\z".r + val DEPRECATED: Regex = "\\A\\s*!\\s*[Dd][Ee][Pp][Rr][Ee][Cc][Aa][Tt][Ee][Dd]?(.*)\\z".r + val SINGLEHEX: Regex = "\\A([0-9A-F-a-f]{2})\\z".r + val HEXSTRING: Regex = "\\A([0-9A-F-a-f]+)\\z".r + val HEXRANGE: Regex = "\\A([0-9A-F-a-f]{2})-([0-9A-F-a-f]{2})\\z".r + val UNICODECODEPOINT: Regex = "\\A[Uu][-+]([0-9A-F-a-f]{1,5})\\z".r + val ESCAPE: Regex = "\\A\\{([\\w.'\\p{L}]+)}\\z".r + val CHAR: Regex = "\\A(\\S)\\z".r + + val Utf8 = new UnicodeTextCodec("UTF-8", StandardCharsets.UTF_8, List(0)) + + val Utf16Be = new UnicodeTextCodec("UTF-16BE", StandardCharsets.UTF_16BE, List(0, 0)) + + val Utf16Le = new UnicodeTextCodec("UTF-16LE", StandardCharsets.UTF_16LE, List(0, 0)) +} \ No newline at end of file diff --git a/src/test/scala/millfork/test/ZLineSizeSuite.scala b/src/test/scala/millfork/test/ZLineSizeSuite.scala index 3ccf2c09..18a6af5f 100644 --- a/src/test/scala/millfork/test/ZLineSizeSuite.scala +++ b/src/test/scala/millfork/test/ZLineSizeSuite.scala @@ -17,7 +17,7 @@ class ZLineSizeSuite extends FunSuite with Matchers { private def runCase(line: ZLine): Unit = { val platform = EmuPlatform.get(Cpu.Z80) val jobContext = JobContext(TestErrorReporting.log, new LabelGenerator) - val options = CompilationOptions(platform, Map(), None, 0, Map(), jobContext) + val options = CompilationOptions(platform, Map(), None, 0, Map(), EmuPlatform.textCodecRepository, jobContext) val env = new Environment(None, "", CpuFamily.I80, options) val correctSize = new Z80Assembler(null, env, platform).emitInstruction("default", options, 0x100, line) - 0x100 val guessedSize = line.sizeInBytes diff --git a/src/test/scala/millfork/test/auxilary/EncodingSanitySuite.scala b/src/test/scala/millfork/test/auxilary/EncodingSanitySuite.scala new file mode 100644 index 00000000..17e3d3c9 --- /dev/null +++ b/src/test/scala/millfork/test/auxilary/EncodingSanitySuite.scala @@ -0,0 +1,50 @@ +package millfork.test.auxilary + +import java.io.File + +import millfork.{CompilationOptions, Cpu, JobContext} +import millfork.compiler.LabelGenerator +import millfork.error.ConsoleLogger +import millfork.parser.{TableTextCodec, TextCodecRepository, TextCodecWithFlags} +import millfork.test.emu.EmuPlatform +import org.scalatest.{FunSuite, Matchers} + +/** + * @author Karol Stasiak + */ +class EncodingSanitySuite extends FunSuite with Matchers { + + + test("Encoding sanity test") { + val log = new ConsoleLogger() + val repo = new TextCodecRepository(List("include")) + val options = CompilationOptions(EmuPlatform.get(Cpu.Intel8080), Map(), None, 0, Map(), new TextCodecRepository(List("D:/dokumenty/millfork/include/encoding")), JobContext(log, new LabelGenerator)) + + def roundtrip(codec: TableTextCodec, str1: String, str2: String): Unit = { + val l1 = codec.encode(log, None, str1.toCharArray.map(_.toInt).toList, options, lenient = false) + val l2 = codec.encode(log, None, str2.toCharArray.map(_.toInt).toList, options, lenient = false) + if (l1 != l2) { + fail(s"Strings $str1 and $str2 encoded to $l1 and $l2 in encoding ${codec.name}") + } + } + + for (encoding <- new File("include/encoding").list()) { + repo.forName(encoding.stripSuffix(".tbl"), None, log).codec match { + case codec:TableTextCodec => + codec.escapeSequences.foreach { + case ("copy", _) => roundtrip(codec, "{copy}", "©") + case ("ss", _) => roundtrip(codec, "{ss}", "ß") + case ("pi", _) => roundtrip(codec, "{pi}", "π") + case ("yen", _) => roundtrip(codec, "{yen}", "¥") + case ("pound", _) => roundtrip(codec, "{pound}", "£") + case ("cent", _) => roundtrip(codec, "{cent}", "¢") + case _ => + } + case _ => + + } + } + } + + +} diff --git a/src/test/scala/millfork/test/emu/EmuI86Run.scala b/src/test/scala/millfork/test/emu/EmuI86Run.scala index f2160f2b..6f75a909 100644 --- a/src/test/scala/millfork/test/emu/EmuI86Run.scala +++ b/src/test/scala/millfork/test/emu/EmuI86Run.scala @@ -33,7 +33,7 @@ object EmuI86Run { val source = Files.readAllLines(Paths.get(filename), StandardCharsets.US_ASCII).asScala.mkString("\n") val options = CompilationOptions(EmuPlatform.get(millfork.Cpu.Intel8086), Map( CompilationFlag.LenientTextEncoding -> true - ), None, 0, Map(), JobContext(TestErrorReporting.log, new LabelGenerator)) + ), None, 0, Map(), EmuPlatform.textCodecRepository, JobContext(TestErrorReporting.log, new LabelGenerator)) val PreprocessingResult(preprocessedSource, features, _) = Preprocessor.preprocessForTest(options, source) TestErrorReporting.log.debug(s"Features: $features") TestErrorReporting.log.info(s"Parsing $filename") @@ -88,7 +88,7 @@ class EmuI86Run(nodeOptimizations: List[NodeOptimization], assemblyOptimizations CompilationFlag.OptimizeForSize -> this.optimizeForSize, CompilationFlag.SubroutineExtraction -> optimizeForSize, CompilationFlag.LenientTextEncoding -> true) - val options = CompilationOptions(platform, millfork.Cpu.defaultFlags(millfork.Cpu.Intel8086).map(_ -> true).toMap ++ extraFlags, None, 0, Map(), JobContext(log, new LabelGenerator)) + val options = CompilationOptions(platform, millfork.Cpu.defaultFlags(millfork.Cpu.Intel8086).map(_ -> true).toMap ++ extraFlags, None, 0, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) log.hasErrors = false log.verbosity = 999 var effectiveSource = source diff --git a/src/test/scala/millfork/test/emu/EmuM6809Run.scala b/src/test/scala/millfork/test/emu/EmuM6809Run.scala index 0a8f00b8..7a19934d 100644 --- a/src/test/scala/millfork/test/emu/EmuM6809Run.scala +++ b/src/test/scala/millfork/test/emu/EmuM6809Run.scala @@ -32,7 +32,7 @@ object EmuM6809Run { val source = Files.readAllLines(Paths.get(filename), StandardCharsets.US_ASCII).asScala.mkString("\n") val options = CompilationOptions(EmuPlatform.get(cpu), Map( CompilationFlag.LenientTextEncoding -> true - ), None, 0, Map(), JobContext(TestErrorReporting.log, new LabelGenerator)) + ), None, 0, Map(), EmuPlatform.textCodecRepository, JobContext(TestErrorReporting.log, new LabelGenerator)) val PreprocessingResult(preprocessedSource, features, _) = Preprocessor.preprocessForTest(options, source) TestErrorReporting.log.debug(s"Features: $features") TestErrorReporting.log.info(s"Parsing $filename") @@ -96,7 +96,7 @@ class EmuM6809Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizat CompilationFlag.OptimizeForSpeed -> blastProcessing, CompilationFlag.OptimizeForSonicSpeed -> blastProcessing // CompilationFlag.CheckIndexOutOfBounds -> true, - ), None, 0, Map(), JobContext(log, new LabelGenerator)) + ), None, 0, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) log.hasErrors = false log.verbosity = 999 var effectiveSource = source diff --git a/src/test/scala/millfork/test/emu/EmuPlatform.scala b/src/test/scala/millfork/test/emu/EmuPlatform.scala index 29b940c1..9742d86b 100644 --- a/src/test/scala/millfork/test/emu/EmuPlatform.scala +++ b/src/test/scala/millfork/test/emu/EmuPlatform.scala @@ -1,7 +1,7 @@ package millfork.test.emu import millfork.output.{AfterCodeByteAllocator, CurrentBankFragmentOutput, UpwardByteAllocator, VariableAllocator} -import millfork.parser.TextCodec +import millfork.parser.{TextCodec, TextCodecRepository} import millfork.{Cpu, CpuFamily, OutputStyle, Platform, ViceDebugOutputFormat} /** @@ -10,6 +10,8 @@ import millfork.{Cpu, CpuFamily, OutputStyle, Platform, ViceDebugOutputFormat} object EmuPlatform { private val pointers: List[Int] = (0 until 256).toList + val textCodecRepository = new TextCodecRepository(List("include")) + def get(cpu: Cpu.Value) = new Platform( cpu, Map(), diff --git a/src/test/scala/millfork/test/emu/EmuRun.scala b/src/test/scala/millfork/test/emu/EmuRun.scala index 4105a9d9..ae331fa9 100644 --- a/src/test/scala/millfork/test/emu/EmuRun.scala +++ b/src/test/scala/millfork/test/emu/EmuRun.scala @@ -35,7 +35,7 @@ object EmuRun { val source = Files.readAllLines(Paths.get(filename), StandardCharsets.US_ASCII).asScala.mkString("\n") val options = CompilationOptions(EmuPlatform.get(millfork.Cpu.Mos), Map( CompilationFlag.LenientTextEncoding -> true - ), None, 4, Map(), JobContext(TestErrorReporting.log, new LabelGenerator)) + ), None, 4, Map(), EmuPlatform.textCodecRepository, JobContext(TestErrorReporting.log, new LabelGenerator)) val PreprocessingResult(preprocessedSource, features, _) = Preprocessor.preprocessForTest(options, source) TestErrorReporting.log.info(s"Parsing $filename") val parser = MosParser("", preprocessedSource, "", options, features) @@ -51,8 +51,8 @@ object EmuRun { } } - private lazy val cachedZpregO: Option[Program]= preload("include/zp_reg.mfk") - private lazy val cachedBcdO: Option[Program] = preload("include/bcd_6502.mfk") + private lazy val cachedZpregO: Option[Program]= preload("include/m6502/zp_reg.mfk") + private lazy val cachedBcdO: Option[Program] = preload("include/m6502/bcd_6502.mfk") private lazy val cachedStdioO: Option[Program] = preload("src/test/resources/include/dummy_stdio.mfk") def cachedZpreg: Program = synchronized { cachedZpregO.getOrElse(throw new IllegalStateException()) } def cachedStdio: Program = synchronized { cachedStdioO.getOrElse(throw new IllegalStateException()) } @@ -161,7 +161,7 @@ class EmuRun(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimization], CompilationFlag.OptimizeForSpeed -> blastProcessing, CompilationFlag.OptimizeForSonicSpeed -> blastProcessing // CompilationFlag.CheckIndexOutOfBounds -> true, - ), None, 4, Map(), JobContext(log, new LabelGenerator)) + ), None, 4, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) log.hasErrors = false log.verbosity = 999 if (native16 && platform.cpu != millfork.Cpu.Sixteen) throw new IllegalStateException diff --git a/src/test/scala/millfork/test/emu/EmuZ80Run.scala b/src/test/scala/millfork/test/emu/EmuZ80Run.scala index 63ce1595..86da93e8 100644 --- a/src/test/scala/millfork/test/emu/EmuZ80Run.scala +++ b/src/test/scala/millfork/test/emu/EmuZ80Run.scala @@ -35,7 +35,7 @@ object EmuZ80Run { val source = Files.readAllLines(Paths.get(filename), StandardCharsets.US_ASCII).asScala.mkString("\n") val options = CompilationOptions(EmuPlatform.get(cpu), Map( CompilationFlag.LenientTextEncoding -> true - ), None, 0, Map(), JobContext(TestErrorReporting.log, new LabelGenerator)) + ), None, 0, Map(), EmuPlatform.textCodecRepository, JobContext(TestErrorReporting.log, new LabelGenerator)) val PreprocessingResult(preprocessedSource, features, _) = Preprocessor.preprocessForTest(options, source) TestErrorReporting.log.debug(s"Features: $features") TestErrorReporting.log.info(s"Parsing $filename") @@ -87,7 +87,7 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio CompilationFlag.EmitIllegals -> (cpu == millfork.Cpu.Z80 || cpu == millfork.Cpu.Intel8085 || cpu == millfork.Cpu.Z80Next), CompilationFlag.EmitZ80NextOpcodes -> (cpu == millfork.Cpu.Z80Next), CompilationFlag.LenientTextEncoding -> true) - val options = CompilationOptions(platform, millfork.Cpu.defaultFlags(cpu).map(_ -> true).toMap ++ extraFlags, None, 0, Map(), JobContext(log, new LabelGenerator)) + val options = CompilationOptions(platform, millfork.Cpu.defaultFlags(cpu).map(_ -> true).toMap ++ extraFlags, None, 0, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) println(cpu) println(options.flags.filter(_._2).keys.toSeq.sorted) log.hasErrors = false diff --git a/src/test/scala/millfork/test/emu/ShouldNotCompile.scala b/src/test/scala/millfork/test/emu/ShouldNotCompile.scala index 1a74b75f..6d0f3257 100644 --- a/src/test/scala/millfork/test/emu/ShouldNotCompile.scala +++ b/src/test/scala/millfork/test/emu/ShouldNotCompile.scala @@ -32,7 +32,7 @@ object ShouldNotCompile extends Matchers { val log = TestErrorReporting.log println(source) val platform = EmuPlatform.get(cpu) - val options = CompilationOptions(platform, Map(CompilationFlag.LenientTextEncoding -> true), None, platform.zpRegisterSize, Map(), JobContext(log, new LabelGenerator)) + val options = CompilationOptions(platform, Map(CompilationFlag.LenientTextEncoding -> true), None, platform.zpRegisterSize, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) log.hasErrors = false log.verbosity = 999 var effectiveSource = source diff --git a/src/test/scala/millfork/test/emu/ShouldNotParse.scala b/src/test/scala/millfork/test/emu/ShouldNotParse.scala index d34d8da6..2b25ee01 100644 --- a/src/test/scala/millfork/test/emu/ShouldNotParse.scala +++ b/src/test/scala/millfork/test/emu/ShouldNotParse.scala @@ -22,7 +22,7 @@ object ShouldNotParse extends Matchers { val log = TestErrorReporting.log println(source) val platform = EmuPlatform.get(cpu) - val options = CompilationOptions(platform, Map(CompilationFlag.LenientTextEncoding -> true), None, platform.zpRegisterSize, Map(), JobContext(log, new LabelGenerator)) + val options = CompilationOptions(platform, Map(CompilationFlag.LenientTextEncoding -> true), None, platform.zpRegisterSize, Map(), EmuPlatform.textCodecRepository, JobContext(log, new LabelGenerator)) log.hasErrors = false log.verbosity = 999 var effectiveSource = source