Add implementation of the <uchar.h> functions.

This commit is contained in:
Stephen Heumann 2021-10-02 22:39:16 -05:00
parent ae504c6e4f
commit e3c9bc96bc
3 changed files with 338 additions and 2 deletions

4
make
View File

@ -19,7 +19,7 @@ if {#} == 0
unset exit
end
for i in cc ctype string stdlib time setjmp orca fcntl vars toolglue signal int64 fenv fpextra math2 locale
for i in cc ctype string stdlib time setjmp orca fcntl vars toolglue signal int64 fenv fpextra math2 locale uchar
Newer obj/{i}.a {i}.asm
if {Status} != 0
set exit on
@ -40,7 +40,7 @@ delete orcalib
set list vars.a assert.a cc.a setjmp.a ctype.a string.a stdlib.a
set list {list} time.a signal.a toolglue.a orca.a fcntl.a stdio.a int64.a
set list {list} fenv.a fpextra.a math2.a locale.a
set list {list} fenv.a fpextra.a math2.a locale.a uchar.a
for i in {list}
echo makelib orcalib +obj/{i}
makelib orcalib +obj/{i}

203
uchar.asm Normal file
View File

@ -0,0 +1,203 @@
keep obj/uchar
mcopy uchar.macros
case on
****************************************************************
*
* UChar - Unicode utilities
*
* This code implements conversions to and from Unicode.
* It assumes the multibyte character set is Mac OS Roman.
*
****************************************************************
*
uchar private
copy equates.asm
end
****************************************************************
*
* size_t mbrtoc16(char16_t * pc16, const char * s, size_t n,
* mbstate_t * ps);
*
* size_t mbrtoc32(char32_t * pc32, const char * s, size_t n,
* mbstate_t * ps);
*
* Convert a multibyte character to UTF-16 or UTF-32.
*
* Inputs:
* pc16 or pc32 - pointer to output location
* s - pointer to multibyte character
* n - maximum number of bytes to examine
* ps - conversion state
*
* Outputs:
* *pc16 or *pc32 - UTF-16 or UTF-32 code unit
* Returns number of bytes in multibyte character or
* 0 for null character.
*
****************************************************************
*
mbrtoc16 start
clv v flag clear => doing mbrtoc16
bra csub
mbrtoc32 entry
sep #$40 v flag set => doing mbrtoc32
csub csubroutine (4:pc16,4:s,4:n,4:ps),0
lda s if s == NULL
ora s+2
bne check_n
stz n call is equivalent to
stz n+2 mbrtoc16(NULL, "", 1, ps),
bra ret so return 0
check_n lda n if n = 0
ora n+2
bne getchar
dec a return (size_t)(-2)
sta n+2
dec a
sta n
bra ret
getchar ldy #1 assume return value is 1
lda [s] load character *s
and #$00ff
bne set_rv if *s == '\0'
dey return value is 0
set_rv sty n set return value
stz n+2
cmp #$0080 if *s is an ASCII character
blt output store it as-is
asl a else
and #$00FF
tax
lda >macRomanToUCS,x convert it to Unicode
output ldx pc16 if pc16 != NULL
bne storeit
ldx pc16+2
beq ret
storeit sta [pc16] store result to *pc16
bvc ret if doing mbrtoc32
lda #0
ldy #2
sta [pc16],y store 0 as high word of result
ret creturn 4:n
end
****************************************************************
*
* size_t c16rtomb(char * s, char16_t c16, mbstate_t * ps);
*
* Convert a UTF-16 code unit to a multibyte character.
*
* Inputs:
* s - pointer to output location
* c16 - UTF-16 code unit
* ps - conversion state
*
* Outputs:
* *s - converted character
* Returns number of bytes stored, or -1 for error.
*
****************************************************************
*
c16rtomb start
csubroutine (4:s,2:c16,4:ps),0
lda s if s == NULL, call is equivalent to
ora s+2 c16rtomb(internal_buf, 0, ps),
beq return_1 so return 1
lda c16 if c16 is an ASCII character
cmp #$0080
blt storeit store it as-is
short I
ldx #0
cvt_loop lda >macRomanToUCS,x for each entry in macRomanToUCS
cmp c16 if it matches c16
beq gotit break and handle the mapping
inx
inx
bne cvt_loop
lda #EILSEQ if no mapping was found
sta >errno errno = EILSEQ
lda #-1 return -1
sta s
sta s+2
long I
bra ret
gotit longi off
txa if we found a mapping
lsr a compute the MacRoman character
ora #$0080
storeit short M store the character
sta [s]
long M,I
return_1 lda #1 return 1
sta s
stz s+2
ret creturn 4:s
end
****************************************************************
*
* size_t c32rtomb(char * s, char16_t c16, mbstate_t * ps);
*
* Convert a UTF-32 code unit to a multibyte character.
*
* Inputs:
* s - pointer to output location
* c16 - UTF-32 code unit
* ps - conversion state
*
* Outputs:
* *s - converted character
* Returns number of bytes stored, or -1 for error.
*
****************************************************************
*
c32rtomb start
lda 10,s if char is outside the BMP
beq fixstack
lda #$FFFD substitute REPLACEMENT CHARACTER
bra fs2
fixstack lda 8,s adjust stack for call to c16rtomb
fs2 sta 10,s
lda 6,s
sta 8,s
lda 4,s
sta 6,s
lda 2,s
sta 4,s
pla
sta 1,s
jml c16rtomb do the equivalent c16rtomb call
end
macRomanToUCS private
dc i2'$00C4, $00C5, $00C7, $00C9, $00D1, $00D6, $00DC, $00E1'
dc i2'$00E0, $00E2, $00E4, $00E3, $00E5, $00E7, $00E9, $00E8'
dc i2'$00EA, $00EB, $00ED, $00EC, $00EE, $00EF, $00F1, $00F3'
dc i2'$00F2, $00F4, $00F6, $00F5, $00FA, $00F9, $00FB, $00FC'
dc i2'$2020, $00B0, $00A2, $00A3, $00A7, $2022, $00B6, $00DF'
dc i2'$00AE, $00A9, $2122, $00B4, $00A8, $2260, $00C6, $00D8'
dc i2'$221E, $00B1, $2264, $2265, $00A5, $00B5, $2202, $2211'
dc i2'$220F, $03C0, $222B, $00AA, $00BA, $03A9, $00E6, $00F8'
dc i2'$00BF, $00A1, $00AC, $221A, $0192, $2248, $2206, $00AB'
dc i2'$00BB, $2026, $00A0, $00C0, $00C3, $00D5, $0152, $0153'
dc i2'$2013, $2014, $201C, $201D, $2018, $2019, $00F7, $25CA'
dc i2'$00FF, $0178, $2044, $00A4, $2039, $203A, $FB01, $FB02'
dc i2'$2021, $00B7, $201A, $201E, $2030, $00C2, $00CA, $00C1'
dc i2'$00CB, $00C8, $00CD, $00CE, $00CF, $00CC, $00D3, $00D4'
dc i2'$F8FF, $00D2, $00DA, $00DB, $00D9, $0131, $02C6, $02DC'
dc i2'$00AF, $02D8, $02D9, $02DA, $00B8, $02DD, $02DB, $02C7'
end

133
uchar.macros Normal file
View File

@ -0,0 +1,133 @@
MACRO
&lab csubroutine &parms,&work
&lab anop
aif c:&work,.a
lclc &work
&work setc 0
.a
gbla &totallen
gbla &worklen
&worklen seta &work
&totallen seta 0
aif c:&parms=0,.e
lclc &len
lclc &p
lcla &i
&i seta 1
.b
&p setc &parms(&i)
&len amid &p,2,1
aif "&len"=":",.c
&len amid &p,1,2
&p amid &p,4,l:&p-3
ago .d
.c
&len amid &p,1,1
&p amid &p,3,l:&p-2
.d
&p equ &totallen+4+&work
&totallen seta &totallen+&len
&i seta &i+1
aif &i<=c:&parms,^b
.e
tsc
aif &work=0,.f
sec
sbc #&work
tcs
.f
phd
tcd
mend
MACRO
&lab creturn &r
&lab anop
lclc &len
aif c:&r,.a
lclc &r
&r setc 0
&len setc 0
ago .h
.a
&len amid &r,2,1
aif "&len"=":",.b
&len amid &r,1,2
&r amid &r,4,l:&r-3
ago .c
.b
&len amid &r,1,1
&r amid &r,3,l:&r-2
.c
aif &len<>2,.d
ldy &r
ago .h
.d
aif &len<>4,.e
ldx &r+2
ldy &r
ago .h
.e
aif &len<>10,.g
ldy #&r
ldx #^&r
ago .h
.g
mnote 'Not a valid return length',16
mexit
.h
aif &totallen=0,.i
lda &worklen+2
sta &worklen+&totallen+2
lda &worklen+1
sta &worklen+&totallen+1
.i
pld
tsc
clc
adc #&worklen+&totallen
tcs
aif &len=0,.j
tya
.j
rtl
mend
macro
&l long &a,&b
lclb &i
lclb &m
&a amid &a,1,1
&m setb ("&a"="M").or.("&a"="m")
&i setb ("&a"="I").or.("&a"="i")
aif c:&b=0,.a
&b amid &b,1,1
&m setb ("&b"="M").or.("&b"="m").or.&m
&i setb ("&b"="I").or.("&b"="i").or.&i
.a
&l rep #&m*32+&i*16
aif .not.&m,.b
longa on
.b
aif .not.&i,.c
longi on
.c
mend
macro
&l short &a,&b
lclb &i
lclb &m
&a amid &a,1,1
&m setb ("&a"="M").or.("&a"="m")
&i setb ("&a"="I").or.("&a"="i")
aif c:&b=0,.a
&b amid &b,1,1
&m setb ("&b"="M").or.("&b"="m").or.&m
&i setb ("&b"="I").or.("&b"="i").or.&i
.a
&l sep #&m*32+&i*16
aif .not.&m,.b
longa off
.b
aif .not.&i,.c
longi off
.c
mend