mirror of
https://github.com/fadden/6502bench.git
synced 2025-01-22 12:33:56 +00:00
38d3adbb08
I didn't think it made sense, but I found something that used it, so apparently it's a thing. This updates the operand editor to let you choose PETSCII+DCI, and updates the assemblers to handle it correctly (really just 64tass, since the others either don't have a DCI directive or don't deal with PETSCII at all). Changed the char-encoding sample from "bad dcI" to "pet dcI", and updated the documentation.
296 lines
12 KiB
C#
296 lines
12 KiB
C#
/*
|
|
* Copyright 2019 faddenSoft
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
using System;
|
|
using System.Diagnostics;
|
|
|
|
namespace Asm65 {
|
|
/// <summary>
|
|
/// Character encoding helper methods.
|
|
/// </summary>
|
|
public static class CharEncoding {
|
|
public const char UNPRINTABLE_CHAR = '\ufffd'; // Unicode REPLACEMENT CHARACTER
|
|
|
|
/// <summary>
|
|
/// Determines whether the byte represents a member of the character set. The
|
|
/// specifics (e.g. printable only) are defined by the method.
|
|
/// </summary>
|
|
public delegate bool InclusionTest(byte val);
|
|
|
|
/// <summary>
|
|
/// Converts the byte to a printable character. Returns UNPRINTABLE_CHAR if the value
|
|
/// does not map to something printable.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Yes, I'm assuming it all fits in a UTF-16 char. PETSCII has some glyphs that
|
|
/// aren't part of the BMP, but we're targeting a variety of cross-assemblers with
|
|
/// potentially different notions of Unicode mappings, so anything non-ASCII is
|
|
/// getting hexified anyway.
|
|
/// </remarks>
|
|
public delegate char Convert(byte val);
|
|
|
|
/// <summary>
|
|
/// Character encoding.
|
|
/// </summary>
|
|
public enum Encoding {
|
|
Unknown = 0,
|
|
Ascii,
|
|
HighAscii,
|
|
C64Petscii,
|
|
C64ScreenCode,
|
|
}
|
|
|
|
//
|
|
// Plain ASCII.
|
|
//
|
|
// We recognize BELL, LF, and CR as control characters that may be present in
|
|
// text strings. This allows use to generate:
|
|
//
|
|
// .str "hello",$0d
|
|
//
|
|
// instead of:
|
|
//
|
|
// .str "hello"
|
|
// .dd1 $0d
|
|
//
|
|
public static bool IsPrintableAscii(byte val) {
|
|
return (val >= 0x20 && val < 0x7f);
|
|
}
|
|
public static bool IsExtendedAscii(byte val) {
|
|
return IsPrintableAscii(val) || val == 0x07 || val == 0x0a || val == 0x0d;
|
|
}
|
|
public static char ConvertAscii(byte val) {
|
|
if (IsPrintableAscii(val)) {
|
|
return (char)val;
|
|
} else {
|
|
return UNPRINTABLE_CHAR;
|
|
}
|
|
}
|
|
|
|
//
|
|
// High ASCII: plain ASCII with the high bit set.
|
|
//
|
|
public static bool IsPrintableHighAscii(byte val) {
|
|
return (val >= 0xa0 && val < 0xff);
|
|
}
|
|
public static bool IsExtendedHighAscii(byte val) {
|
|
return IsPrintableHighAscii(val) || val == 0x87 || val == 0x8a || val == 0x8d;
|
|
}
|
|
public static char ConvertHighAscii(byte val) {
|
|
if (IsPrintableHighAscii(val)) {
|
|
return (char)(val & 0x7f);
|
|
} else {
|
|
return UNPRINTABLE_CHAR;
|
|
}
|
|
}
|
|
|
|
//
|
|
// High and/or low ASCII.
|
|
//
|
|
public static bool IsPrintableLowOrHighAscii(byte val) {
|
|
return IsPrintableAscii((byte)(val & 0x7f));
|
|
}
|
|
public static bool IsExtendedLowOrHighAscii(byte val) {
|
|
return IsExtendedAscii((byte)(val & 0x7f));
|
|
}
|
|
public static char ConvertLowAndHighAscii(byte val) {
|
|
//if (IsPrintableAscii(val) || IsPrintableHighAscii(val)) {
|
|
// return (char)(val & 0x7f);
|
|
//} else {
|
|
// return UNPRINTABLE_CHAR;
|
|
//}
|
|
return ConvertAscii((byte)(val & 0x7f));
|
|
}
|
|
|
|
//
|
|
// ATASCII (Atari 400/800)
|
|
//
|
|
// Substantially similar to ASCII, but with printable symbols in the control character
|
|
// range ($00-1f). Characters $60 and $7b-7f don't correspond to ASCII symbols.
|
|
//
|
|
// Characters with the high bit set are shown with colors reversed.
|
|
//
|
|
|
|
|
|
//
|
|
// PETSCII (C64 variant)
|
|
//
|
|
// Assemblers like ACME use the C64 character set 2, a/k/a shifted mode, lower case
|
|
// mode, or text mode.
|
|
//
|
|
// Comparison to ASCII:
|
|
// $00-1f: control codes, many with C64-specific meanings
|
|
// $20-3f: same as ASCII
|
|
// $40-5f: lower case letters (rather than upper case); backslash, caret, and underscore
|
|
// replaced with non-ASCII symbols (though the up-arrow in place of caret is close)
|
|
// $60-7f: upper case letters (rather than lower case); backquote, curly braces,
|
|
// vertical bar, and tilde replaced with non-ASCII symbols
|
|
// $80-9f: more control codes
|
|
// $a0-bf: non-ASCII symbols
|
|
// $c0-df: clone of $60-7f; by convention this is used for upper case, since it's
|
|
// equal to lower case with the high bit set
|
|
// $e0-ff: non-ASCII symbols (mostly a clone of $a0-bf)
|
|
//
|
|
// The printable ASCII set (glyphs in [$20,$7e]) is [$20,$5b]+$5d+[$c1,$da].
|
|
// (Looks like the Pet had $5c=backslash, but C64 went with a \u00a3 POUND SIGN instead.)
|
|
// Anything outside that range will get printed as hex to ensure proper conversion.
|
|
//
|
|
// Note for the pedantic: in ASCII-1963, up-arrow and left-arrow characters were
|
|
// assigned to the caret and underscore values. So arguably those are "ASCII" as
|
|
// well, unless you're sane and define ASCII more narrowly.
|
|
//
|
|
// Control codes that we might expect to appear in the middle of a string:
|
|
// $05 1c 1e 1f 81 90 95 96 97 98 99 9a 9b 9c 9e 9f - set text color
|
|
// $93 - clear
|
|
// $12 92 - reverse on/off
|
|
// $07 0a 0d - bell, LF, CR (note CR is favored for EOL)
|
|
//
|
|
// Other Commodore systems use variants on PETSCII, but the ASCII correspondence remains
|
|
// the same -- only the non-ASCII symbols change. (On the original PET, $60-7f was a
|
|
// duplicate of $20-3f rather than a duplicate of the upper-case letters, which might be
|
|
// why $c0-df is preferred for upper case.)
|
|
//
|
|
// For full details, see the chart at https://www.aivosto.com/articles/petscii.pdf
|
|
//
|
|
private static bool[] sPrintablePetscii = CreatePrintablePetsciiMap();
|
|
private static bool[] sExtendedPetscii = CreateExtendedPetsciiMap();
|
|
private static bool[] CreatePrintablePetsciiMap() {
|
|
bool[] map = new bool[256];
|
|
for (int i = 0x20; i <= 0x5b; i++) {
|
|
map[i] = true;
|
|
}
|
|
map[0x5d] = true;
|
|
for (int i = 0xc1; i <= 0xda; i++) {
|
|
map[i] = true;
|
|
}
|
|
return map;
|
|
}
|
|
private static bool[] CreateExtendedPetsciiMap() {
|
|
bool[] map = CreatePrintablePetsciiMap();
|
|
// control codes that we might expect to find in strings
|
|
map[0x05] = map[0x1c] = map[0x1e] = map[0x1f] = map[0x81] = map[0x90] = map[0x95] =
|
|
map[0x96] = map[0x97] = map[0x98] = map[0x99] = map[0x9a] = map[0x9b] =
|
|
map[0x9c] = map[0x9e] = map[0x9f] = true;
|
|
map[0x93] = map[0x12] = map[0x92] = true;
|
|
map[0x07] = map[0x0a] = map[0x0d] = true;
|
|
return map;
|
|
}
|
|
public static bool IsPrintableC64Petscii(byte val) {
|
|
return sPrintablePetscii[val];
|
|
}
|
|
public static bool IsExtendedC64Petscii(byte val) {
|
|
return sExtendedPetscii[val];
|
|
}
|
|
private static char[] sPetsciiToUnicode = CreatePetsciiToUnicodeMap();
|
|
private static char[] CreatePetsciiToUnicodeMap() {
|
|
// There are performance arguments for doing this with and without a table. For
|
|
// x64 with fast memory and large caches, table seems reasonable.
|
|
char[] map = new char[256];
|
|
for (int val = 0; val < 256; val++) {
|
|
char ch;
|
|
if ((val >= 0x20 && val <= 0x40) || val == 0x5b || val == 0x5d) {
|
|
ch = (char)val; // number/symbols, '[', ']'
|
|
} else if (val >= 0x41 && val <= 0x5a) {
|
|
ch = (char)(val + 0x20); // lower case
|
|
} else if (val >= 0xc1 && val <= 0xda) {
|
|
ch = (char)(val - 0x80); // upper case
|
|
} else {
|
|
Debug.Assert(!IsPrintableC64Petscii((byte)val));
|
|
ch = UNPRINTABLE_CHAR;
|
|
}
|
|
map[val] = ch;
|
|
}
|
|
return map;
|
|
}
|
|
public static char ConvertC64Petscii(byte val) {
|
|
return sPetsciiToUnicode[val];
|
|
}
|
|
public static char ConvertLowAndHighC64Petscii(byte val) {
|
|
// This is an odd one. Some programs use DCI with PETSCII, which means the
|
|
// string is allow lower case except for the last letteR.
|
|
//
|
|
// There's no such thing as "high PETSCII", in the same sense that ASCII or
|
|
// C64 screen codes have it, but I'm giving the method a similar name for
|
|
// the sake of consistency.
|
|
return ConvertC64Petscii((byte)(val & 0x7f));
|
|
}
|
|
|
|
//
|
|
// C64 Screen Codes
|
|
//
|
|
// Using character set 2, which includes lower case letters.
|
|
//
|
|
// $00-1f: lower case letters (PETSCII $40-5f)
|
|
// $20-3f: same as ASCII (PETSCII $20-3f)
|
|
// $40-5f: upper case letters (PETSCII $60-7f / $c0-df)
|
|
// $60-7f: non-ASCII symbols (PETSCII $a0-bf)
|
|
//
|
|
// With the high bit set, character colors are reversed. The printable ASCII set
|
|
// is [$00,$1b]+$1d+[$20,$3f]+[$41,$5a]. By definition, only printable characters
|
|
// are included in the set, so there are no control codes.
|
|
//
|
|
// For full details, see the chart at https://www.aivosto.com/articles/petscii.pdf
|
|
//
|
|
private static bool[] sPrintableScreenCode = CreatePrintableScreenCodeMap();
|
|
private static bool[] CreatePrintableScreenCodeMap() {
|
|
bool[] map = new bool[256];
|
|
for (int i = 0x00; i <= 0x1b; i++) {
|
|
map[i] = true;
|
|
}
|
|
map[0x1d] = true;
|
|
for (int i = 0x20; i <= 0x3f; i++) {
|
|
map[i] = true;
|
|
}
|
|
for (int i = 0x41; i <= 0x5a; i++) {
|
|
map[i] = true;
|
|
}
|
|
return map;
|
|
}
|
|
public static bool IsPrintableC64ScreenCode(byte val) {
|
|
return sPrintableScreenCode[val];
|
|
}
|
|
public static bool IsExtendedC64ScreenCode(byte val) {
|
|
return sPrintableScreenCode[val];
|
|
}
|
|
private static char[] sScreenCodeToUnicode = CreateScreenCodeToUnicodeMap();
|
|
private static char[] CreateScreenCodeToUnicodeMap() {
|
|
char[] map = new char[256];
|
|
for (int val = 0; val < 256; val++) {
|
|
char ch;
|
|
if (val == 0x00 || val == 0x1b || val == 0x1d) {
|
|
ch = (char)(val + 0x40); // '@', '[', ']'
|
|
} else if (val >= 0x01 && val <= 0x1a) {
|
|
ch = (char)(val + 0x60); // lower case
|
|
} else if (val >= 0x20 && val <= 0x3f) {
|
|
ch = (char)(val); // numbers/symbols
|
|
} else if (val >= 0x41 && val <= 0x5a) {
|
|
ch = (char)(val); // upper case
|
|
} else {
|
|
Debug.Assert(!IsPrintableC64ScreenCode((byte)val));
|
|
ch = UNPRINTABLE_CHAR;
|
|
}
|
|
map[val] = ch;
|
|
}
|
|
return map;
|
|
}
|
|
public static char ConvertC64ScreenCode(byte val) {
|
|
return sScreenCodeToUnicode[val];
|
|
}
|
|
public static char ConvertLowAndHighC64ScreenCode(byte val) {
|
|
return ConvertC64ScreenCode((byte)(val & 0x7f));
|
|
}
|
|
}
|
|
}
|