From 560d81043e78df65d3b53343cdaecb4526b239e8 Mon Sep 17 00:00:00 2001 From: jespergravgaard Date: Sat, 8 Aug 2020 02:09:08 +0200 Subject: [PATCH] Added support for Atari string encodings ATASCII and screencoce_atari. Closes #500 --- .../dk/camelot64/kickc/asm/AsmProgram.java | 10 ++- .../camelot64/kickc/asm/AsmSetEncoding.java | 2 +- .../model/values/CharToAtasciiConverter.java | 35 ++++++++++ .../kickc/model/values/StringEncoding.java | 34 +++++++--- src/test/kc/examples/atarixl/helloxl.c | 7 +- src/test/kc/string-encoding-unknown.c | 2 +- src/test/ref/examples/atarixl/helloxl.asm | 5 +- src/test/ref/examples/atarixl/helloxl.log | 68 +++---------------- src/test/ref/examples/atarixl/helloxl.sym | 2 +- 9 files changed, 87 insertions(+), 78 deletions(-) create mode 100644 src/main/java/dk/camelot64/kickc/model/values/CharToAtasciiConverter.java diff --git a/src/main/java/dk/camelot64/kickc/asm/AsmProgram.java b/src/main/java/dk/camelot64/kickc/asm/AsmProgram.java index deb12931a..da9353b9f 100644 --- a/src/main/java/dk/camelot64/kickc/asm/AsmProgram.java +++ b/src/main/java/dk/camelot64/kickc/asm/AsmProgram.java @@ -70,8 +70,6 @@ public class AsmProgram { public void addLine(AsmLine line) { line.setIndex(nextLineIndex++); getCurrentChunk().addLine(line); - if(line instanceof AsmSetEncoding) - currentEncoding = ((AsmSetEncoding) line).getEncoding(); } /** @@ -90,7 +88,13 @@ public class AsmProgram { // Size is 1 - grab it! StringEncoding encoding = encodings.iterator().next(); if(!getCurrentEncoding().equals(encoding)) { - addLine(new AsmSetEncoding(encoding)); + if(encoding.asmEncoding != null) { + addLine(new AsmSetEncoding(encoding)); + } else { + addLine(new AsmSetEncoding(StringEncoding.ASCII)); + } + currentEncoding = encoding; + } } diff --git a/src/main/java/dk/camelot64/kickc/asm/AsmSetEncoding.java b/src/main/java/dk/camelot64/kickc/asm/AsmSetEncoding.java index 98912771d..a889c36b4 100644 --- a/src/main/java/dk/camelot64/kickc/asm/AsmSetEncoding.java +++ b/src/main/java/dk/camelot64/kickc/asm/AsmSetEncoding.java @@ -24,7 +24,7 @@ public class AsmSetEncoding implements AsmLine { @Override public String getAsm() { - return ".encoding \"" + encoding.name+ "\""; + return ".encoding \"" + encoding.asmEncoding+ "\""; } @Override diff --git a/src/main/java/dk/camelot64/kickc/model/values/CharToAtasciiConverter.java b/src/main/java/dk/camelot64/kickc/model/values/CharToAtasciiConverter.java new file mode 100644 index 000000000..e0756a383 --- /dev/null +++ b/src/main/java/dk/camelot64/kickc/model/values/CharToAtasciiConverter.java @@ -0,0 +1,35 @@ +package dk.camelot64.kickc.model.values; + +import kickass.nonasm.c64.CharToPetsciiConverter; + +import java.util.HashMap; +import java.util.Map; + +/** + * Supports the ATARI charset ATASCII and the ATARI screencodes + */ +public class CharToAtasciiConverter { + + /** Map from UNICODE char to Byte value for ATASCII. https://www.atariarchives.org/mapping/appendix10.php */ + public static Map charToAtascii; + + /** Map from UNICODE char to Byte value for Atari Screencode. https://www.atariarchives.org/mapping/appendix10.php */ + public static Map charToScreenCodeAtari; + + static { + charToAtascii = CharToPetsciiConverter.charToAscii; + charToScreenCodeAtari = new HashMap<>(); + for(Character atasciiChar : charToAtascii.keySet()) { + final Byte atasciiByte = charToAtascii.get(atasciiChar); + Byte screencodeAtariByte; + if(atasciiByte >= 0 && atasciiByte <= 31) + screencodeAtariByte = (byte) (atasciiByte + 64); + else if(atasciiByte >= 32 && atasciiByte <= 96) + screencodeAtariByte = (byte) (atasciiByte - 32); + else + screencodeAtariByte = atasciiByte; + charToScreenCodeAtari.put(atasciiChar, screencodeAtariByte); + } + } + +} diff --git a/src/main/java/dk/camelot64/kickc/model/values/StringEncoding.java b/src/main/java/dk/camelot64/kickc/model/values/StringEncoding.java index ba6feee3c..ea3a6a99e 100644 --- a/src/main/java/dk/camelot64/kickc/model/values/StringEncoding.java +++ b/src/main/java/dk/camelot64/kickc/model/values/StringEncoding.java @@ -9,11 +9,14 @@ import java.util.PrimitiveIterator; /** String encoding. */ public enum StringEncoding { - PETSCII_MIXED("petscii_mixed", "pm", CharToPetsciiConverter.charToPetscii_mixed), - PETSCII_UPPER("petscii_upper", "pu", CharToPetsciiConverter.charToPetscii_mixed), - SCREENCODE_MIXED("screencode_mixed", "sm", CharToPetsciiConverter.charToScreenCode_mixed), - SCREENCODE_UPPER("screencode_upper", "su", CharToPetsciiConverter.charToScreenCode_upper); - + PETSCII_MIXED("petscii_mixed", "petscii_mixed", "pm", CharToPetsciiConverter.charToPetscii_mixed), + PETSCII_UPPER("petscii_upper", "petscii_upper", "pu", CharToPetsciiConverter.charToPetscii_mixed), + SCREENCODE_MIXED("screencode_mixed", "screencode_mixed", "sm", CharToPetsciiConverter.charToScreenCode_mixed), + SCREENCODE_UPPER("screencode_upper", "screencode_upper", "su", CharToPetsciiConverter.charToScreenCode_upper), + ASCII("ascii", "ascii", "as", CharToPetsciiConverter.charToAscii), + ATASCII("atascii", "ascii", "at", CharToAtasciiConverter.charToAtascii), + SCREENCODE_ATARI("screencode_atari", null, "sa", CharToAtasciiConverter.charToScreenCodeAtari), + ; /** Char value used to encode \xnn chars without a value within the chosen encoding. A char C is encoded as CHAR_SPECIAL_VAL+C */ public static final char CHAR_SPECIAL_VAL = 64000; @@ -25,14 +28,18 @@ public enum StringEncoding { /** The encoding name. */ public final String name; + /** The KickAsm Encoding name. Null if KickAsm does not support the encoding. */ + public final String asmEncoding; + /** The string suffix usable for selecting the encoding. */ public final String suffix; - /** The mapping from character value to integer value for the encoding. */ + /** The mapping from character value to integer (byte) value for the encoding. */ public final Map mapping; - StringEncoding(String name, String suffix, Map mapping) { + StringEncoding(String name, String asmEncoding, String suffix, Map mapping) { this.name = name; + this.asmEncoding = asmEncoding; this.suffix = suffix; this.mapping = mapping; } @@ -163,8 +170,7 @@ public enum StringEncoding { hexNum += (char) escapedCharsIterator.nextInt(); hexNum += (char) escapedCharsIterator.nextInt(); final byte hexEncoding = (byte) Integer.parseInt(hexNum, 16); - final Character aChar = charFromEncoded(hexEncoding); - return aChar; + return charFromEncoded(hexEncoding); default: throw new CompileError("Illegal string escape sequence \\" + escapeChar); } @@ -178,6 +184,14 @@ public enum StringEncoding { * @return The char itself - or the appropriate escape sequence if needed. */ public String asciiToEscapedEncoded(char aChar, boolean escapeSingleQuotes) { + if(this.asmEncoding == null) { + // Encoding not supported by KickAsm - convert to ASCII / use escapes + final byte encoded = encodedFromChar(aChar); + if(encoded != ASCII.encodedFromChar(aChar)) + // Not the same as in ASCII - use escape + return String.format("\\$%02x", encoded); + } + switch(aChar) { case '\n': return "\\n"; @@ -198,7 +212,7 @@ public enum StringEncoding { if(aChar > 127) { // Encode all large chars - including SPECIAL's final byte encoded = encodedFromChar(aChar); - return String.format("\\$%x", encoded); + return String.format("\\$%02x", encoded); } else return Character.toString(aChar); } diff --git a/src/test/kc/examples/atarixl/helloxl.c b/src/test/kc/examples/atarixl/helloxl.c index 5985e1e8f..0fda58bd7 100644 --- a/src/test/kc/examples/atarixl/helloxl.c +++ b/src/test/kc/examples/atarixl/helloxl.c @@ -4,6 +4,8 @@ // Display Lists atariarchives.org/mapping/appendix8.php #pragma target(atarixl) +#pragma encoding(screencode_atari) +#pragma emulator("65XEDebugger") #include void main() { @@ -16,14 +18,13 @@ void main() { } // Message to show -// Encoding: atari_screencode -char TEXT[] = {'h'|0x20,'e'|0x20,'l'|0x20,'l'|0x20,'o'|0x20,0x0,'x'|0x60,'t'|0x60,0x0,'w'|0x20,'o'|0x20,'r'|0x20,'l'|0x20,'d'|0x20,0x41,0x0,0x0,0x0,0x0}; +char TEXT[] = "hello XT world! "; // ANTIC Display List Program // https://en.wikipedia.org/wiki/ANTIC char DISPLAY_LIST[] = { 0x70, 0x70, 0x70, // 3* BLK 8 (0x70) 8 blank lines - 0x47, TEXT, // LMS 7, TEXT (0x47) Load memory address and set to charmode 7 (16/20/24 chars wide, 16 lines per char) + 0x47, TEXT, // LMS 7, TEXT (0x47) Load memory address and set to charmode 7 (16/20/24 chars wide, 16 lines per char) 0x41, DISPLAY_LIST // JVB DISPLAY_LIST (0x41) jump and wait for VBLANK }; diff --git a/src/test/kc/string-encoding-unknown.c b/src/test/kc/string-encoding-unknown.c index 92b58f1d6..88670e729 100644 --- a/src/test/kc/string-encoding-unknown.c +++ b/src/test/kc/string-encoding-unknown.c @@ -1,6 +1,6 @@ // Test setting an unknown encoding using the #encoding pragma -#pragma encoding(ascii) +#pragma encoding(ebcdic) void main() { } diff --git a/src/test/ref/examples/atarixl/helloxl.asm b/src/test/ref/examples/atarixl/helloxl.asm index 7716947b9..e8d13aeb8 100644 --- a/src/test/ref/examples/atarixl/helloxl.asm +++ b/src/test/ref/examples/atarixl/helloxl.asm @@ -47,8 +47,9 @@ main: { } .segment Data // Message to show - // Encoding: atari_screencode - TEXT: .byte 'h'|$20, 'e'|$20, 'l'|$20, 'l'|$20, 'o'|$20, 0, 'x'|$60, 't'|$60, 0, 'w'|$20, 'o'|$20, 'r'|$20, 'l'|$20, 'd'|$20, $41, 0, 0, 0, 0 +.encoding "ascii" + TEXT: .text @"hello\$00\$38\$34\$00world\$01\$00" + .byte 0 // ANTIC Display List Program // https://en.wikipedia.org/wiki/ANTIC DISPLAY_LIST: .byte $70, $70, $70, $47, TEXT, $41, DISPLAY_LIST diff --git a/src/test/ref/examples/atarixl/helloxl.log b/src/test/ref/examples/atarixl/helloxl.log index 02b92a3c5..70bd346d6 100644 --- a/src/test/ref/examples/atarixl/helloxl.log +++ b/src/test/ref/examples/atarixl/helloxl.log @@ -141,7 +141,7 @@ SYMBOL TABLE SSA (const byte*) DISPLAY_LIST[] = { (byte) $70, (byte) $70, (byte) $70, (byte) $47, <(const byte*) TEXT, >(const byte*) TEXT, (byte) $41, <(const byte*) DISPLAY_LIST, >(const byte*) DISPLAY_LIST } (const nomodify byte**) SDLST = (byte**)(number) $230 (const nomodify byte*) SDMCTL = (byte*)(number) $22f -(const byte*) TEXT[] = { (byte)(byte) 'h'|(number) $20, (byte)(byte) 'e'|(number) $20, (byte)(byte) 'l'|(number) $20, (byte)(byte) 'l'|(number) $20, (byte)(byte) 'o'|(number) $20, (byte) 0, (byte)(byte) 'x'|(number) $60, (byte)(byte) 't'|(number) $60, (byte) 0, (byte)(byte) 'w'|(number) $20, (byte)(byte) 'o'|(number) $20, (byte)(byte) 'r'|(number) $20, (byte)(byte) 'l'|(number) $20, (byte)(byte) 'd'|(number) $20, (byte) $41, (byte) 0, (byte) 0, (byte) 0, (byte) 0 } +(const byte*) TEXT[] = (byte*) "hello XT world! "sa (void()) __start() (label) __start::@1 (label) __start::@2 @@ -152,50 +152,14 @@ SYMBOL TABLE SSA (label) main::@1 (label) main::@return -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $60 in -Adding number conversion cast (unumber) $60 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in -Adding number conversion cast (unumber) $20 in Adding number conversion cast (unumber) $21 in *((const nomodify byte*) SDMCTL) ← (number) $21 Successful SSA optimization PassNAddNumberTypeConversions Inlining cast *((const nomodify byte*) SDMCTL) ← (unumber)(number) $21 Successful SSA optimization Pass2InlineCast Simplifying constant pointer cast (byte*) 559 Simplifying constant pointer cast (byte**) 560 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 -Simplifying constant integer cast $60 -Simplifying constant integer cast $60 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 -Simplifying constant integer cast $20 Simplifying constant integer cast $21 Successful SSA optimization PassNCastSimplification -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $60 -Finalized unsigned number type (byte) $60 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 -Finalized unsigned number type (byte) $20 Finalized unsigned number type (byte) $21 Successful SSA optimization PassNFinalizeNumberTypeConversions Simple Condition (bool~) main::$0 [3] if((number) 0!=(number) 1) goto main::@1 @@ -211,19 +175,6 @@ Removing unused procedure block __start::@1 Removing unused procedure block __start::@2 Removing unused procedure block __start::@return Successful SSA optimization PassNEliminateEmptyStart -Simplifying constant integer cast (byte) 'h'|(byte) $20 -Simplifying constant integer cast (byte) 'e'|(byte) $20 -Simplifying constant integer cast (byte) 'l'|(byte) $20 -Simplifying constant integer cast (byte) 'l'|(byte) $20 -Simplifying constant integer cast (byte) 'o'|(byte) $20 -Simplifying constant integer cast (byte) 'x'|(byte) $60 -Simplifying constant integer cast (byte) 't'|(byte) $60 -Simplifying constant integer cast (byte) 'w'|(byte) $20 -Simplifying constant integer cast (byte) 'o'|(byte) $20 -Simplifying constant integer cast (byte) 'r'|(byte) $20 -Simplifying constant integer cast (byte) 'l'|(byte) $20 -Simplifying constant integer cast (byte) 'd'|(byte) $20 -Successful SSA optimization PassNCastSimplification Adding NOP phi() at start of main::@1 CALL GRAPH @@ -409,8 +360,9 @@ main: { // File Data .segment Data // Message to show - // Encoding: atari_screencode - TEXT: .byte 'h'|$20, 'e'|$20, 'l'|$20, 'l'|$20, 'o'|$20, 0, 'x'|$60, 't'|$60, 0, 'w'|$20, 'o'|$20, 'r'|$20, 'l'|$20, 'd'|$20, $41, 0, 0, 0, 0 +.encoding "ascii" + TEXT: .text @"hello\$00\$38\$34\$00world\$01\$00" + .byte 0 // ANTIC Display List Program // https://en.wikipedia.org/wiki/ANTIC DISPLAY_LIST: .byte $70, $70, $70, $47, TEXT, $41, DISPLAY_LIST @@ -496,8 +448,9 @@ main: { // File Data .segment Data // Message to show - // Encoding: atari_screencode - TEXT: .byte 'h'|$20, 'e'|$20, 'l'|$20, 'l'|$20, 'o'|$20, 0, 'x'|$60, 't'|$60, 0, 'w'|$20, 'o'|$20, 'r'|$20, 'l'|$20, 'd'|$20, $41, 0, 0, 0, 0 +.encoding "ascii" + TEXT: .text @"hello\$00\$38\$34\$00world\$01\$00" + .byte 0 // ANTIC Display List Program // https://en.wikipedia.org/wiki/ANTIC DISPLAY_LIST: .byte $70, $70, $70, $47, TEXT, $41, DISPLAY_LIST @@ -617,7 +570,7 @@ FINAL SYMBOL TABLE (const byte*) DISPLAY_LIST[] = { (byte) $70, (byte) $70, (byte) $70, (byte) $47, <(const byte*) TEXT, >(const byte*) TEXT, (byte) $41, <(const byte*) DISPLAY_LIST, >(const byte*) DISPLAY_LIST } (const nomodify byte**) SDLST = (byte**) 560 (const nomodify byte*) SDMCTL = (byte*) 559 -(const byte*) TEXT[] = { (byte) 'h'|(byte) $20, (byte) 'e'|(byte) $20, (byte) 'l'|(byte) $20, (byte) 'l'|(byte) $20, (byte) 'o'|(byte) $20, (byte) 0, (byte) 'x'|(byte) $60, (byte) 't'|(byte) $60, (byte) 0, (byte) 'w'|(byte) $20, (byte) 'o'|(byte) $20, (byte) 'r'|(byte) $20, (byte) 'l'|(byte) $20, (byte) 'd'|(byte) $20, (byte) $41, (byte) 0, (byte) 0, (byte) 0, (byte) 0 } +(const byte*) TEXT[] = (byte*) "hello XT world! "sa (void()) main() (label) main::@1 @@ -684,8 +637,9 @@ main: { // File Data .segment Data // Message to show - // Encoding: atari_screencode - TEXT: .byte 'h'|$20, 'e'|$20, 'l'|$20, 'l'|$20, 'o'|$20, 0, 'x'|$60, 't'|$60, 0, 'w'|$20, 'o'|$20, 'r'|$20, 'l'|$20, 'd'|$20, $41, 0, 0, 0, 0 +.encoding "ascii" + TEXT: .text @"hello\$00\$38\$34\$00world\$01\$00" + .byte 0 // ANTIC Display List Program // https://en.wikipedia.org/wiki/ANTIC DISPLAY_LIST: .byte $70, $70, $70, $47, TEXT, $41, DISPLAY_LIST diff --git a/src/test/ref/examples/atarixl/helloxl.sym b/src/test/ref/examples/atarixl/helloxl.sym index a0cead02d..75efabc1e 100644 --- a/src/test/ref/examples/atarixl/helloxl.sym +++ b/src/test/ref/examples/atarixl/helloxl.sym @@ -102,7 +102,7 @@ (const byte*) DISPLAY_LIST[] = { (byte) $70, (byte) $70, (byte) $70, (byte) $47, <(const byte*) TEXT, >(const byte*) TEXT, (byte) $41, <(const byte*) DISPLAY_LIST, >(const byte*) DISPLAY_LIST } (const nomodify byte**) SDLST = (byte**) 560 (const nomodify byte*) SDMCTL = (byte*) 559 -(const byte*) TEXT[] = { (byte) 'h'|(byte) $20, (byte) 'e'|(byte) $20, (byte) 'l'|(byte) $20, (byte) 'l'|(byte) $20, (byte) 'o'|(byte) $20, (byte) 0, (byte) 'x'|(byte) $60, (byte) 't'|(byte) $60, (byte) 0, (byte) 'w'|(byte) $20, (byte) 'o'|(byte) $20, (byte) 'r'|(byte) $20, (byte) 'l'|(byte) $20, (byte) 'd'|(byte) $20, (byte) $41, (byte) 0, (byte) 0, (byte) 0, (byte) 0 } +(const byte*) TEXT[] = (byte*) "hello XT world! "sa (void()) main() (label) main::@1