mirror of
https://github.com/irmen/prog8.git
synced 2025-02-16 07:31:48 +00:00
half width katakana conversion
This commit is contained in:
parent
b4facaeb3c
commit
8f6eaeac2c
@ -6,6 +6,61 @@ import com.github.michaelbull.result.Result
|
||||
import java.io.CharConversionException
|
||||
import java.nio.charset.Charset
|
||||
|
||||
|
||||
object JapaneseCharacterConverter {
|
||||
// adapted from https://github.com/raminduw/Japanese-Character-Converter
|
||||
|
||||
private val ZENKAKU_KATAKANA = charArrayOf(
|
||||
'ァ', 'ア', 'ィ', 'イ', 'ゥ',
|
||||
'ウ', 'ェ', 'エ', 'ォ', 'オ', 'カ', 'ガ', 'キ', 'ギ', 'ク', 'グ', 'ケ', 'ゲ',
|
||||
'コ', 'ゴ', 'サ', 'ザ', 'シ', 'ジ', 'ス', 'ズ', 'セ', 'ゼ', 'ソ', 'ゾ', 'タ',
|
||||
'ダ', 'チ', 'ヂ', 'ッ', 'ツ', 'ヅ', 'テ', 'デ', 'ト', 'ド', 'ナ', 'ニ', 'ヌ',
|
||||
'ネ', 'ノ', 'ハ', 'バ', 'パ', 'ヒ', 'ビ', 'ピ', 'フ', 'ブ', 'プ', 'ヘ', 'ベ',
|
||||
'ペ', 'ホ', 'ボ', 'ポ', 'マ', 'ミ', 'ム', 'メ', 'モ', 'ャ', 'ヤ', 'ュ', 'ユ',
|
||||
'ョ', 'ヨ', 'ラ', 'リ', 'ル', 'レ', 'ロ', 'ヮ', 'ワ', 'ヰ', 'ヱ', 'ヲ', 'ン',
|
||||
'ヴ', 'ヵ', 'ヶ'
|
||||
)
|
||||
|
||||
private val HANKAKU_HIRAGANA = charArrayOf(
|
||||
'ぁ', 'あ', 'ぃ', 'い', 'ぅ', 'う', 'ぇ', 'え',
|
||||
'ぉ', 'お', 'か', 'が', 'き', 'ぎ', 'く', 'ぐ',
|
||||
'け', 'げ', 'こ', 'ご', 'さ', 'ざ', 'し', 'じ',
|
||||
'す', 'ず', 'せ', 'ぜ', 'そ', 'ぞ', 'た', 'だ',
|
||||
'ち', 'ぢ', 'っ', 'つ', 'づ', 'て', 'で', 'と',
|
||||
'ど', 'な', 'に', 'ぬ', 'ね', 'の', 'は', 'ば',
|
||||
'ぱ', 'ひ', 'び', 'ぴ', 'ふ', 'ぶ', 'ぷ', 'へ',
|
||||
'べ', 'ぺ', 'ほ', 'ぼ', 'ぽ', 'ま', 'み', 'む',
|
||||
'め', 'も', 'ゃ', 'や', 'ゅ', 'ゆ', 'ょ', 'よ',
|
||||
'ら', 'り', 'る', 'れ', 'ろ', 'ゎ', 'わ', 'ゐ',
|
||||
'ゑ', 'を', 'ん', 'ゔ', 'ゕ', 'ゖ'
|
||||
)
|
||||
|
||||
private val HANKAKU_KATAKANA = arrayOf(
|
||||
"ァ", "ア", "ィ", "イ", "ゥ",
|
||||
"ウ", "ェ", "エ", "ォ", "オ", "カ", "ガ", "キ", "ギ", "ク", "グ", "ケ",
|
||||
"ゲ", "コ", "ゴ", "サ", "ザ", "シ", "ジ", "ス", "ズ", "セ", "ゼ", "ソ",
|
||||
"ゾ", "タ", "ダ", "チ", "ヂ", "ッ", "ツ", "ヅ", "テ", "デ", "ト", "ド",
|
||||
"ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "バ", "パ", "ヒ", "ビ", "ピ", "フ",
|
||||
"ブ", "プ", "ヘ", "ベ", "ペ", "ホ", "ボ", "ポ", "マ", "ミ", "ム", "メ",
|
||||
"モ", "ャ", "ヤ", "ュ", "ユ", "ョ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ",
|
||||
"ワ", "イ", "エ", "ヲ", "ン", "ヴ", "カ", "ケ"
|
||||
)
|
||||
|
||||
private val ZENKAKU_KATAKANA_FIRST_CHAR_CODE = ZENKAKU_KATAKANA.first().code
|
||||
private val HANKAKU_HIRAGANA_FIRST_CHAR_CODE = HANKAKU_HIRAGANA.first().code
|
||||
|
||||
private fun zenkakuKatakanaToHankakuKatakana(c: Char): String = if (c in ZENKAKU_KATAKANA) HANKAKU_KATAKANA[c.code - ZENKAKU_KATAKANA_FIRST_CHAR_CODE] else c.toString()
|
||||
private fun hankakuKatakanaToZenkakuKatakana(c: Char): Char = if (c in HANKAKU_HIRAGANA) ZENKAKU_KATAKANA[c.code - HANKAKU_HIRAGANA_FIRST_CHAR_CODE] else c
|
||||
|
||||
fun zenkakuKatakanaToHankakuKatakana(s: String): String = buildString {
|
||||
for (element in s) {
|
||||
val converted = hankakuKatakanaToZenkakuKatakana(element)
|
||||
val convertedChar = zenkakuKatakanaToHankakuKatakana(converted)
|
||||
append(convertedChar)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object KatakanaEncoding {
|
||||
val charset: Charset = Charset.forName("JIS_X0201")
|
||||
|
||||
@ -13,11 +68,6 @@ object KatakanaEncoding {
|
||||
return try {
|
||||
val mapped = str.map { chr ->
|
||||
when (chr) {
|
||||
// TODO: Convert regular katakana to halfwidth katakana (java lib doesn't do that for us
|
||||
// and simply returns '?' upon reaching a regular katakana character)
|
||||
// NOTE: we probably need to somehow do that before we reach this `when`,
|
||||
// as one regular katakana character often results in two HW katakana characters
|
||||
// due to differences in how diacritics are handled.
|
||||
|
||||
'\u0000' -> 0u
|
||||
'\u00a0' -> 0xa0u // $a0 isn't technically a part of JIS X 0201 spec, and so we need to handle this ourselves
|
||||
|
@ -91,7 +91,7 @@ class ConstantFoldingOptimizer(private val program: Program, private val errors:
|
||||
program.encoding.encodeString(leftString.value, leftString.encoding) + program.encoding.encodeString(rightString.value, rightString.encoding),
|
||||
leftString.encoding)
|
||||
}
|
||||
val concatStr = StringLiteral(concatenated, leftString.encoding, expr.position)
|
||||
val concatStr = StringLiteral.create(concatenated, leftString.encoding, expr.position)
|
||||
return listOf(IAstModification.ReplaceNode(expr, concatStr, parent))
|
||||
}
|
||||
else if(expr.operator=="*" && rightconst!=null && expr.left is StringLiteral) {
|
||||
@ -99,7 +99,7 @@ class ConstantFoldingOptimizer(private val program: Program, private val errors:
|
||||
val part = expr.left as StringLiteral
|
||||
if(part.value.isEmpty())
|
||||
errors.warn("resulting string has length zero", part.position)
|
||||
val newStr = StringLiteral(part.value.repeat(rightconst.number.toInt()), part.encoding, expr.position)
|
||||
val newStr = StringLiteral.create(part.value.repeat(rightconst.number.toInt()), part.encoding, expr.position)
|
||||
return listOf(IAstModification.ReplaceNode(expr, newStr, parent))
|
||||
}
|
||||
}
|
||||
|
@ -263,7 +263,7 @@ sub iso16() {
|
||||
}
|
||||
|
||||
sub kata() {
|
||||
; -- switch to katakana character set
|
||||
; -- switch to katakana character set (requires rom 48+)
|
||||
cbm.CHROUT($0f) ; iso mode
|
||||
cx16.screen_set_charset(12, 0) ; charset
|
||||
}
|
||||
|
@ -75,17 +75,6 @@ private fun oneFloatArgOutputFloat(args: List<Expression>, position: Position, p
|
||||
return NumericLiteral(DataType.FLOAT, function(constval.number), args[0].position)
|
||||
}
|
||||
|
||||
private fun collectionArgBoolResult(args: List<Expression>, position: Position, program: Program, function: (arg: List<Double>)->Boolean): NumericLiteral {
|
||||
if(args.size!=1)
|
||||
throw SyntaxError("builtin function requires one non-scalar argument", position)
|
||||
|
||||
val array= args[0] as? ArrayLiteral ?: throw NotConstArgumentException()
|
||||
val constElements = array.value.map{it.constValue(program)?.number}
|
||||
if(constElements.contains(null))
|
||||
throw NotConstArgumentException()
|
||||
return NumericLiteral.fromBoolean(function(constElements.mapNotNull { it }), args[0].position)
|
||||
}
|
||||
|
||||
private fun builtinAbs(args: List<Expression>, position: Position, program: Program): NumericLiteral {
|
||||
// 1 arg, type = int, result type= uword
|
||||
if(args.size!=1)
|
||||
@ -138,6 +127,8 @@ private fun builtinLen(args: List<Expression>, position: Position, program: Prog
|
||||
return NumericLiteral.optimalInteger(arraySize, position)
|
||||
if(args[0] is ArrayLiteral)
|
||||
return NumericLiteral.optimalInteger((args[0] as ArrayLiteral).value.size, position)
|
||||
if(args[0] is StringLiteral)
|
||||
return NumericLiteral.optimalInteger((args[0] as StringLiteral).value.length, position)
|
||||
if(args[0] !is IdentifierReference)
|
||||
throw SyntaxError("len argument should be an identifier", position)
|
||||
val target = (args[0] as IdentifierReference).targetVarDecl(program)
|
||||
|
@ -177,7 +177,7 @@ internal class AstIdentifiersChecker(private val errors: IErrorReporter,
|
||||
'_'
|
||||
}.joinToString("")
|
||||
val textEncoding = (call as Node).definingModule.textEncoding
|
||||
call.args[0] = StringLiteral(processed, textEncoding, name.position)
|
||||
call.args[0] = StringLiteral.create(processed, textEncoding, name.position)
|
||||
call.args[0].linkParents(call as Node)
|
||||
}
|
||||
}
|
||||
|
@ -96,11 +96,11 @@ class TestNumericLiteral: FunSpec({
|
||||
}
|
||||
|
||||
test("testEqualsRef") {
|
||||
(StringLiteral("hello", Encoding.PETSCII, dummyPos) == StringLiteral("hello", Encoding.PETSCII, dummyPos)) shouldBe true
|
||||
(StringLiteral("hello", Encoding.PETSCII, dummyPos) != StringLiteral("bye", Encoding.PETSCII, dummyPos)) shouldBe true
|
||||
(StringLiteral("hello", Encoding.SCREENCODES, dummyPos) == StringLiteral("hello", Encoding.SCREENCODES, dummyPos)) shouldBe true
|
||||
(StringLiteral("hello", Encoding.SCREENCODES, dummyPos) != StringLiteral("bye", Encoding.SCREENCODES, dummyPos)) shouldBe true
|
||||
(StringLiteral("hello", Encoding.SCREENCODES, dummyPos) != StringLiteral("hello", Encoding.PETSCII, dummyPos)) shouldBe true
|
||||
(StringLiteral.create("hello", Encoding.PETSCII, dummyPos) == StringLiteral.create("hello", Encoding.PETSCII, dummyPos)) shouldBe true
|
||||
(StringLiteral.create("hello", Encoding.PETSCII, dummyPos) != StringLiteral.create("bye", Encoding.PETSCII, dummyPos)) shouldBe true
|
||||
(StringLiteral.create("hello", Encoding.SCREENCODES, dummyPos) == StringLiteral.create("hello", Encoding.SCREENCODES, dummyPos)) shouldBe true
|
||||
(StringLiteral.create("hello", Encoding.SCREENCODES, dummyPos) != StringLiteral.create("bye", Encoding.SCREENCODES, dummyPos)) shouldBe true
|
||||
(StringLiteral.create("hello", Encoding.SCREENCODES, dummyPos) != StringLiteral.create("hello", Encoding.PETSCII, dummyPos)) shouldBe true
|
||||
|
||||
val lvOne = NumericLiteral(DataType.UBYTE, 1.0, dummyPos)
|
||||
val lvTwo = NumericLiteral(DataType.UBYTE, 2.0, dummyPos)
|
||||
|
@ -3,15 +3,18 @@ package prog8tests
|
||||
import com.github.michaelbull.result.Ok
|
||||
import com.github.michaelbull.result.expectError
|
||||
import com.github.michaelbull.result.getOrElse
|
||||
import io.kotest.assertions.throwables.shouldThrow
|
||||
import io.kotest.assertions.withClue
|
||||
import io.kotest.core.spec.style.FunSpec
|
||||
import io.kotest.matchers.shouldBe
|
||||
import io.kotest.matchers.shouldNotBe
|
||||
import prog8.ast.expressions.CharLiteral
|
||||
import prog8.ast.expressions.NumericLiteral
|
||||
import prog8.ast.expressions.StringLiteral
|
||||
import prog8.ast.statements.Assignment
|
||||
import prog8.ast.statements.VarDecl
|
||||
import prog8.code.core.Encoding
|
||||
import prog8.code.core.Position
|
||||
import prog8.code.core.unescape
|
||||
import prog8.code.target.C64Target
|
||||
import prog8.code.target.Cx16Target
|
||||
@ -21,6 +24,7 @@ import prog8.code.target.encodings.IsoEncoding
|
||||
import prog8.code.target.encodings.PetsciiEncoding
|
||||
import prog8tests.helpers.ErrorReporterForTests
|
||||
import prog8tests.helpers.compileText
|
||||
import java.io.CharConversionException
|
||||
|
||||
|
||||
class TestStringEncodings: FunSpec({
|
||||
@ -231,6 +235,22 @@ class TestStringEncodings: FunSpec({
|
||||
}
|
||||
}
|
||||
|
||||
context("kata") {
|
||||
test("kata translation to half width glyphs") {
|
||||
val orig = "カ が ガ"
|
||||
orig.length shouldBe 5
|
||||
val str = StringLiteral.create(orig, Encoding.KATAKANA, Position.DUMMY)
|
||||
str.value.length shouldBe 7
|
||||
|
||||
val character = CharLiteral.create('カ', Encoding.KATAKANA, Position.DUMMY)
|
||||
character.value shouldBe 'カ'
|
||||
|
||||
shouldThrow<CharConversionException> {
|
||||
CharLiteral.create('ガ', Encoding.KATAKANA, Position.DUMMY)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("special pass-through") {
|
||||
val passthroughEscaped= """\x00\x1b\x99\xff"""
|
||||
val passthrough = passthroughEscaped.unescape()
|
||||
@ -299,12 +319,22 @@ class TestStringEncodings: FunSpec({
|
||||
str string1 = "default"
|
||||
str string2 = sc:"screencodes"
|
||||
str string3 = iso:"iso"
|
||||
str string4 = petscii:"petscii"
|
||||
str string4 = iso5:"Хозяин и Работник"
|
||||
str string5 = iso16:"zażółć gęślą jaźń"
|
||||
str string6 = cp437:"≈ IBM Pc ≈ ♂♀♪☺¶"
|
||||
str string7 = petscii:"petscii"
|
||||
str string8 = atascii:"atascii"
|
||||
str string9 = kata:"クジン。 # が # ガ"
|
||||
|
||||
ubyte char1 = 'd'
|
||||
ubyte char2 = sc:'s'
|
||||
ubyte char3 = iso:'i'
|
||||
ubyte char4 = petscii:'p'
|
||||
ubyte char4 = iso5:'и'
|
||||
ubyte char5 = iso16:'ł'
|
||||
ubyte char6 = cp437:'☺'
|
||||
ubyte char7 = petscii:'p'
|
||||
ubyte char8 = atascii:'p'
|
||||
ubyte char9 = kata:'カ'
|
||||
|
||||
sub start() {
|
||||
}
|
||||
|
@ -612,9 +612,9 @@ class TestProg8Parser: FunSpec( {
|
||||
}
|
||||
|
||||
test("testCharLiteralConstValue") {
|
||||
val char1 = CharLiteral('A', Encoding.PETSCII, Position.DUMMY)
|
||||
val char2 = CharLiteral('z', Encoding.SCREENCODES, Position.DUMMY)
|
||||
val char3 = CharLiteral('_', Encoding.ISO, Position.DUMMY)
|
||||
val char1 = CharLiteral.create('A', Encoding.PETSCII, Position.DUMMY)
|
||||
val char2 = CharLiteral.create('z', Encoding.SCREENCODES, Position.DUMMY)
|
||||
val char3 = CharLiteral.create('_', Encoding.ISO, Position.DUMMY)
|
||||
|
||||
val program = Program("test", DummyFunctions, DummyMemsizer, AsciiStringEncoder)
|
||||
char1.constValue(program).number.toInt() shouldBe 65
|
||||
@ -640,8 +640,8 @@ class TestProg8Parser: FunSpec( {
|
||||
(ten <= ten) shouldBe true
|
||||
(ten < ten) shouldBe false
|
||||
|
||||
val abc = StringLiteral("abc", Encoding.PETSCII, Position.DUMMY)
|
||||
val abd = StringLiteral("abd", Encoding.PETSCII, Position.DUMMY)
|
||||
val abc = StringLiteral.create("abc", Encoding.PETSCII, Position.DUMMY)
|
||||
val abd = StringLiteral.create("abd", Encoding.PETSCII, Position.DUMMY)
|
||||
abc shouldBe abc
|
||||
(abc!=abd) shouldBe true
|
||||
(abc!=abc) shouldBe false
|
||||
|
@ -10,6 +10,8 @@ import prog8.ast.statements.*
|
||||
import prog8.ast.walk.AstWalker
|
||||
import prog8.ast.walk.IAstVisitor
|
||||
import prog8.code.core.*
|
||||
import prog8.code.target.encodings.JapaneseCharacterConverter
|
||||
import java.io.CharConversionException
|
||||
import java.util.*
|
||||
import kotlin.math.abs
|
||||
import kotlin.math.floor
|
||||
@ -713,7 +715,7 @@ class NumericLiteral(val type: DataType, // only numerical types allowed
|
||||
}
|
||||
}
|
||||
|
||||
class CharLiteral(val value: Char,
|
||||
class CharLiteral private constructor(val value: Char,
|
||||
var encoding: Encoding,
|
||||
override val position: Position) : Expression() {
|
||||
override lateinit var parent: Node
|
||||
@ -723,9 +725,20 @@ class CharLiteral(val value: Char,
|
||||
}
|
||||
|
||||
companion object {
|
||||
fun create(character: Char, encoding: Encoding, position: Position): CharLiteral {
|
||||
if(encoding==Encoding.KATAKANA) {
|
||||
val processed = JapaneseCharacterConverter.zenkakuKatakanaToHankakuKatakana(character.toString())
|
||||
if(processed.length==1)
|
||||
return CharLiteral(processed[0], encoding, position)
|
||||
else
|
||||
throw CharConversionException("character literal encodes into multiple bytes at $position")
|
||||
} else
|
||||
return CharLiteral(character, encoding, position)
|
||||
}
|
||||
|
||||
fun fromEscaped(raw: String, encoding: Encoding, position: Position): CharLiteral {
|
||||
val unescaped = raw.unescape()
|
||||
return CharLiteral(unescaped[0], encoding, position)
|
||||
return create(unescaped[0], encoding, position)
|
||||
}
|
||||
}
|
||||
|
||||
@ -756,7 +769,7 @@ class CharLiteral(val value: Char,
|
||||
}
|
||||
}
|
||||
|
||||
class StringLiteral(val value: String,
|
||||
class StringLiteral private constructor(val value: String,
|
||||
var encoding: Encoding,
|
||||
override val position: Position) : Expression() {
|
||||
override lateinit var parent: Node
|
||||
@ -766,10 +779,15 @@ class StringLiteral(val value: String,
|
||||
}
|
||||
|
||||
companion object {
|
||||
fun fromEscaped(raw: String, encoding: Encoding, position: Position): StringLiteral {
|
||||
val unescaped = raw.unescape()
|
||||
return StringLiteral(unescaped, encoding, position)
|
||||
fun create(str: String, encoding: Encoding, position: Position): StringLiteral {
|
||||
if (encoding == Encoding.KATAKANA) {
|
||||
val processed = JapaneseCharacterConverter.zenkakuKatakanaToHankakuKatakana(str)
|
||||
return StringLiteral(processed, encoding, position)
|
||||
} else
|
||||
return StringLiteral(str, encoding, position)
|
||||
}
|
||||
|
||||
fun fromEscaped(raw: String, encoding: Encoding, position: Position): StringLiteral = create(raw.unescape(), encoding, position)
|
||||
}
|
||||
|
||||
override val isSimple = true
|
||||
|
@ -555,7 +555,7 @@ Here are examples of the various encodings:
|
||||
- ``iso16:"zażółć gęślą jaźń"`` string in iso-8859-16 encoding (Eastern Europe)
|
||||
- ``atascii:"I am Atari!"`` string in "atascii" encoding (Atari 8-bit)
|
||||
- ``cp437:"≈ IBM Pc ≈ ♂♀♪☺¶"`` string in "cp437" encoding (IBM PC codepage 437)
|
||||
- ``kata:"アノ ニホンジン ワ ガイコクジン。"`` string in "kata" encoding (Katakana half-width support only for now)
|
||||
- ``kata:"アノ ニホンジン ワ ガイコクジン。 # が # ガ"`` string in "kata" encoding (Katakana)
|
||||
|
||||
|
||||
There are several escape sequences available to put special characters into your string value:
|
||||
@ -976,7 +976,7 @@ where <statements> can be just a single statement or a block again::
|
||||
if_XX {
|
||||
<statements>
|
||||
} else {
|
||||
<alternative statements>
|
||||
<alternative statements>
|
||||
}
|
||||
|
||||
The XX corresponds to one of the processor's branching instructions, so the possibilities are:
|
||||
|
@ -54,8 +54,8 @@ main {
|
||||
sub kata() {
|
||||
txt.kata()
|
||||
repeat 3 txt.nl()
|
||||
write_screencodes(kata:"Katakana hw: アノ ニホンジン ワ ガイコクジン ノ ニホンゴ ガ ジョウズ ダッテ ユッタ。")
|
||||
txt.print(kata:"Katakana hw: アノ ニホンジン ワ ガイコクジン ノ ニホンゴ ガ ジョウズ ダッテ ユッタ。")
|
||||
write_screencodes(kata:"Katakana: アノ ニホンジン ワ ガイコクジン ノ ニホンゴ ガ ジョウズ ダッテ ユッタ。 ## がが ## ガガ")
|
||||
txt.print(kata:"Katakana: アノ ニホンジン ワ ガイコクジン ノ ニホンゴ ガ ジョウズ ダッテ ユッタ。 ## がが ## ガガ")
|
||||
}
|
||||
|
||||
sub wait() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user