From e3c9bc96bc8889c48fb896a864da43243de9949d Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Sat, 2 Oct 2021 22:39:16 -0500 Subject: [PATCH] Add implementation of the functions. --- make | 4 +- uchar.asm | 203 +++++++++++++++++++++++++++++++++++++++++++++++++++ uchar.macros | 133 +++++++++++++++++++++++++++++++++ 3 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 uchar.asm create mode 100644 uchar.macros diff --git a/make b/make index 45eaf5b..0976585 100644 --- a/make +++ b/make @@ -19,7 +19,7 @@ if {#} == 0 unset exit end - for i in cc ctype string stdlib time setjmp orca fcntl vars toolglue signal int64 fenv fpextra math2 locale + for i in cc ctype string stdlib time setjmp orca fcntl vars toolglue signal int64 fenv fpextra math2 locale uchar Newer obj/{i}.a {i}.asm if {Status} != 0 set exit on @@ -40,7 +40,7 @@ delete orcalib set list vars.a assert.a cc.a setjmp.a ctype.a string.a stdlib.a set list {list} time.a signal.a toolglue.a orca.a fcntl.a stdio.a int64.a -set list {list} fenv.a fpextra.a math2.a locale.a +set list {list} fenv.a fpextra.a math2.a locale.a uchar.a for i in {list} echo makelib orcalib +obj/{i} makelib orcalib +obj/{i} diff --git a/uchar.asm b/uchar.asm new file mode 100644 index 0000000..6484fed --- /dev/null +++ b/uchar.asm @@ -0,0 +1,203 @@ + keep obj/uchar + mcopy uchar.macros + case on + +**************************************************************** +* +* UChar - Unicode utilities +* +* This code implements conversions to and from Unicode. +* It assumes the multibyte character set is Mac OS Roman. +* +**************************************************************** +* +uchar private + copy equates.asm + end + +**************************************************************** +* +* size_t mbrtoc16(char16_t * pc16, const char * s, size_t n, +* mbstate_t * ps); +* +* size_t mbrtoc32(char32_t * pc32, const char * s, size_t n, +* mbstate_t * ps); +* +* Convert a multibyte character to UTF-16 or UTF-32. +* +* Inputs: +* pc16 or pc32 - pointer to output location +* s - pointer to multibyte character +* n - maximum number of bytes to examine +* ps - conversion state +* +* Outputs: +* *pc16 or *pc32 - UTF-16 or UTF-32 code unit +* Returns number of bytes in multibyte character or +* 0 for null character. +* +**************************************************************** +* +mbrtoc16 start + clv v flag clear => doing mbrtoc16 + bra csub + +mbrtoc32 entry + sep #$40 v flag set => doing mbrtoc32 + +csub csubroutine (4:pc16,4:s,4:n,4:ps),0 + + lda s if s == NULL + ora s+2 + bne check_n + stz n call is equivalent to + stz n+2 mbrtoc16(NULL, "", 1, ps), + bra ret so return 0 +check_n lda n if n = 0 + ora n+2 + bne getchar + dec a return (size_t)(-2) + sta n+2 + dec a + sta n + bra ret +getchar ldy #1 assume return value is 1 + lda [s] load character *s + and #$00ff + bne set_rv if *s == '\0' + dey return value is 0 +set_rv sty n set return value + stz n+2 + cmp #$0080 if *s is an ASCII character + blt output store it as-is + asl a else + and #$00FF + tax + lda >macRomanToUCS,x convert it to Unicode +output ldx pc16 if pc16 != NULL + bne storeit + ldx pc16+2 + beq ret +storeit sta [pc16] store result to *pc16 + bvc ret if doing mbrtoc32 + lda #0 + ldy #2 + sta [pc16],y store 0 as high word of result + +ret creturn 4:n + end + + +**************************************************************** +* +* size_t c16rtomb(char * s, char16_t c16, mbstate_t * ps); +* +* Convert a UTF-16 code unit to a multibyte character. +* +* Inputs: +* s - pointer to output location +* c16 - UTF-16 code unit +* ps - conversion state +* +* Outputs: +* *s - converted character +* Returns number of bytes stored, or -1 for error. +* +**************************************************************** +* +c16rtomb start + + csubroutine (4:s,2:c16,4:ps),0 + + lda s if s == NULL, call is equivalent to + ora s+2 c16rtomb(internal_buf, 0, ps), + beq return_1 so return 1 + lda c16 if c16 is an ASCII character + cmp #$0080 + blt storeit store it as-is + short I + ldx #0 +cvt_loop lda >macRomanToUCS,x for each entry in macRomanToUCS + cmp c16 if it matches c16 + beq gotit break and handle the mapping + inx + inx + bne cvt_loop + lda #EILSEQ if no mapping was found + sta >errno errno = EILSEQ + lda #-1 return -1 + sta s + sta s+2 + long I + bra ret +gotit longi off + txa if we found a mapping + lsr a compute the MacRoman character + ora #$0080 +storeit short M store the character + sta [s] + long M,I +return_1 lda #1 return 1 + sta s + stz s+2 + +ret creturn 4:s + end + + +**************************************************************** +* +* size_t c32rtomb(char * s, char16_t c16, mbstate_t * ps); +* +* Convert a UTF-32 code unit to a multibyte character. +* +* Inputs: +* s - pointer to output location +* c16 - UTF-32 code unit +* ps - conversion state +* +* Outputs: +* *s - converted character +* Returns number of bytes stored, or -1 for error. +* +**************************************************************** +* +c32rtomb start + + lda 10,s if char is outside the BMP + beq fixstack + lda #$FFFD substitute REPLACEMENT CHARACTER + bra fs2 + +fixstack lda 8,s adjust stack for call to c16rtomb +fs2 sta 10,s + lda 6,s + sta 8,s + lda 4,s + sta 6,s + lda 2,s + sta 4,s + pla + sta 1,s + jml c16rtomb do the equivalent c16rtomb call + end + + +macRomanToUCS private + dc i2'$00C4, $00C5, $00C7, $00C9, $00D1, $00D6, $00DC, $00E1' + dc i2'$00E0, $00E2, $00E4, $00E3, $00E5, $00E7, $00E9, $00E8' + dc i2'$00EA, $00EB, $00ED, $00EC, $00EE, $00EF, $00F1, $00F3' + dc i2'$00F2, $00F4, $00F6, $00F5, $00FA, $00F9, $00FB, $00FC' + dc i2'$2020, $00B0, $00A2, $00A3, $00A7, $2022, $00B6, $00DF' + dc i2'$00AE, $00A9, $2122, $00B4, $00A8, $2260, $00C6, $00D8' + dc i2'$221E, $00B1, $2264, $2265, $00A5, $00B5, $2202, $2211' + dc i2'$220F, $03C0, $222B, $00AA, $00BA, $03A9, $00E6, $00F8' + dc i2'$00BF, $00A1, $00AC, $221A, $0192, $2248, $2206, $00AB' + dc i2'$00BB, $2026, $00A0, $00C0, $00C3, $00D5, $0152, $0153' + dc i2'$2013, $2014, $201C, $201D, $2018, $2019, $00F7, $25CA' + dc i2'$00FF, $0178, $2044, $00A4, $2039, $203A, $FB01, $FB02' + dc i2'$2021, $00B7, $201A, $201E, $2030, $00C2, $00CA, $00C1' + dc i2'$00CB, $00C8, $00CD, $00CE, $00CF, $00CC, $00D3, $00D4' + dc i2'$F8FF, $00D2, $00DA, $00DB, $00D9, $0131, $02C6, $02DC' + dc i2'$00AF, $02D8, $02D9, $02DA, $00B8, $02DD, $02DB, $02C7' + end diff --git a/uchar.macros b/uchar.macros new file mode 100644 index 0000000..e402109 --- /dev/null +++ b/uchar.macros @@ -0,0 +1,133 @@ + MACRO +&lab csubroutine &parms,&work +&lab anop + aif c:&work,.a + lclc &work +&work setc 0 +.a + gbla &totallen + gbla &worklen +&worklen seta &work +&totallen seta 0 + aif c:&parms=0,.e + lclc &len + lclc &p + lcla &i +&i seta 1 +.b +&p setc &parms(&i) +&len amid &p,2,1 + aif "&len"=":",.c +&len amid &p,1,2 +&p amid &p,4,l:&p-3 + ago .d +.c +&len amid &p,1,1 +&p amid &p,3,l:&p-2 +.d +&p equ &totallen+4+&work +&totallen seta &totallen+&len +&i seta &i+1 + aif &i<=c:&parms,^b +.e + tsc + aif &work=0,.f + sec + sbc #&work + tcs +.f + phd + tcd + mend + MACRO +&lab creturn &r +&lab anop + lclc &len + aif c:&r,.a + lclc &r +&r setc 0 +&len setc 0 + ago .h +.a +&len amid &r,2,1 + aif "&len"=":",.b +&len amid &r,1,2 +&r amid &r,4,l:&r-3 + ago .c +.b +&len amid &r,1,1 +&r amid &r,3,l:&r-2 +.c + aif &len<>2,.d + ldy &r + ago .h +.d + aif &len<>4,.e + ldx &r+2 + ldy &r + ago .h +.e + aif &len<>10,.g + ldy #&r + ldx #^&r + ago .h +.g + mnote 'Not a valid return length',16 + mexit +.h + aif &totallen=0,.i + lda &worklen+2 + sta &worklen+&totallen+2 + lda &worklen+1 + sta &worklen+&totallen+1 +.i + pld + tsc + clc + adc #&worklen+&totallen + tcs + aif &len=0,.j + tya +.j + rtl + mend + macro +&l long &a,&b + lclb &i + lclb &m +&a amid &a,1,1 +&m setb ("&a"="M").or.("&a"="m") +&i setb ("&a"="I").or.("&a"="i") + aif c:&b=0,.a +&b amid &b,1,1 +&m setb ("&b"="M").or.("&b"="m").or.&m +&i setb ("&b"="I").or.("&b"="i").or.&i +.a +&l rep #&m*32+&i*16 + aif .not.&m,.b + longa on +.b + aif .not.&i,.c + longi on +.c + mend + macro +&l short &a,&b + lclb &i + lclb &m +&a amid &a,1,1 +&m setb ("&a"="M").or.("&a"="m") +&i setb ("&a"="I").or.("&a"="i") + aif c:&b=0,.a +&b amid &b,1,1 +&m setb ("&b"="M").or.("&b"="m").or.&m +&i setb ("&b"="I").or.("&b"="i").or.&i +.a +&l sep #&m*32+&i*16 + aif .not.&m,.b + longa off +.b + aif .not.&i,.c + longi off +.c + mend