diff --git a/compiler/res/prog8lib/cx16/diskio.p8 b/compiler/res/prog8lib/cx16/diskio.p8 index dceec47b8..a3e9612c6 100644 --- a/compiler/res/prog8lib/cx16/diskio.p8 +++ b/compiler/res/prog8lib/cx16/diskio.p8 @@ -738,6 +738,24 @@ io_error: cbm.CLOSE(15) } + sub get_loadaddress(str filename) -> uword { + ; get the load adress from a PRG file (usually $0801 but it can be different) + + cbm.SETNAM(strings.length(filename), filename) + cbm.SETLFS(READ_IO_CHANNEL, drivenumber, READ_IO_CHANNEL) + void cbm.OPEN() ; open 12,8,12,"filename" + cx16.r0 = 0 + if_cc { + cbm.CHKIN(READ_IO_CHANNEL) + cx16.r0L = cbm.CHRIN() + cx16.r0H = cbm.CHRIN() + if cbm.READST()!=0 + cx16.r0 = 0 + } + cbm.CLOSE(READ_IO_CHANNEL) + return cx16.r0 + } + ; CommanderX16 extensions over the basic C64/C128 diskio routines: diff --git a/compiler/res/prog8lib/shared_cbm_diskio.p8 b/compiler/res/prog8lib/shared_cbm_diskio.p8 index ae35e86cc..4905ed432 100644 --- a/compiler/res/prog8lib/shared_cbm_diskio.p8 +++ b/compiler/res/prog8lib/shared_cbm_diskio.p8 @@ -651,4 +651,22 @@ io_error: cbm.CLRCHN() cbm.CLOSE(15) } + + sub get_loadaddress(str filename) -> uword { + ; get the load adress from a PRG file (usually $0801 but it can be different) + + cbm.SETNAM(strings.length(filename), filename) + cbm.SETLFS(READ_IO_CHANNEL, drivenumber, READ_IO_CHANNEL) + void cbm.OPEN() ; open 12,8,12,"filename" + cx16.r0 = 0 + if_cc { + cbm.CHKIN(READ_IO_CHANNEL) + cx16.r0L = cbm.CHRIN() + cx16.r0H = cbm.CHRIN() + if cbm.READST()!=0 + cx16.r0 = 0 + } + cbm.CLOSE(READ_IO_CHANNEL) + return cx16.r0 + } } diff --git a/compiler/res/prog8lib/shared_compression.p8 b/compiler/res/prog8lib/shared_compression.p8 index 90187f100..113dcb101 100644 --- a/compiler/res/prog8lib/shared_compression.p8 +++ b/compiler/res/prog8lib/shared_compression.p8 @@ -624,8 +624,6 @@ zx0_gamma_done: tax ; Preserve bit-buffer. ; ; NOTE: for speed reasons this decompressor is NOT bank-aware and NOT I/O register aware; ; it only outputs to a memory buffer somewhere in the active 64 Kb address range - ; - ; TODO: include the in-place decompression version as well? %asm {{ @@ -859,6 +857,265 @@ lzput = cx16.r3 ; 2 bytes } + asmsub decode_tscrunch_inplace(uword compressed @R0) clobbers(A,X,Y) { + ; Decompress a block of data compressed by TSCRUNCH *in place* + ; This can save an extra memory buffer if you are reading crunched data from a file into a buffer. + ; see https://github.com/tonysavon/TSCrunch + ; It has extremely fast decompression (approaching RLE speeds), + ; better compression as RLE, but slightly worse compression ration than LZSA + ; + ; NOTE: to allow in-place decompression you need to use -i switch when crunching. + ; also, both the input data file and compressed data file are PRG files with a load header! + ; NOTE: for speed reasons this decompressor is NOT bank-aware and NOT I/O register aware; + ; it only outputs to a memory buffer somewhere in the active 64 Kb address range + %asm {{ + +; NMOS 6502 decompressor for data stored in TSCrunch format. +; +; Copyright Antonio Savona 2022. +; Distributed under the Apache software License v2.0 https://www.apache.org/licenses/LICENSE-2.0 +; +; Adapted for Prog8 and 6502 CMOS by Irmen de Jong. + + + +.if cx16.r0 < $100 + ; r0-r15 registers are in zeropage just use those +tsget = cx16.r0 ; 2 bytes +tsput = cx16.r1 ; 2 bytes +tstemp = cx16.r2 +lzput = cx16.r3 ; 2 bytes +.else + .error "in decode_tscrunch: r0-15 are not in zeropage and no alternatives have been set up yet" ; TODO +.endif + + +.if cx16.r0>=$100 + ; set up the source and destination pointer + lda cx16.r0L + sta tsget + lda cx16.r0H + sta tsget+1 +.endif + + + ldy #$ff + - iny + lda (tsget),y + sta tsput , y ; last iteration trashes lzput, with no effect. + cpy #3 + bne - + + pha + + lda lzput + sta optRun + 1 + + tya + ldy #0 + beq update_getonly + + entry2: + ; ILLEGAL lax (tsget),y + lda (tsget),y + tax + + bmi rleorlz + + cmp #$20 + bcs lz2 + + ; literal + + inc tsget + beq updatelit_hi + return_from_updatelit: + + ts_delit_loop: + + lda (tsget),y + sta (tsput),y + iny + dex + + bne ts_delit_loop + + tya + tax + ; carry is clear + ldy #0 + + updatezp_noclc: + adc tsput + sta tsput + bcs updateput_hi + putnoof: + txa + update_getonly: + adc tsget + sta tsget + bcc entry2 + inc tsget+1 + bcs entry2 + + updatelit_hi: + inc tsget+1 + bcc return_from_updatelit + updateput_hi: + inc tsput+1 + clc + bcc putnoof + + rleorlz: + + ; ILLEGAL: alr #$7f + and #$7f + lsr a + bcc ts_delz + + ; RLE + beq optRun + + plain: + ldx #2 + iny + sta tstemp ; number of bytes to de-rle + + lda (tsget),y ; fetch rle byte + ldy tstemp + runStart: + sta (tsput),y + + ts_derle_loop: + + dey + sta (tsput),y + + bne ts_derle_loop + + ; update zero page with a = runlen, x = 2 , y = 0 + lda tstemp + + bcs updatezp_noclc + + done: + pla + sta (tsput),y + rts + ; LZ2 + lz2: + beq done + + ora #$80 + adc tsput + sta lzput + lda tsput + 1 + sbc #$00 + sta lzput + 1 + + ; y already zero + lda (lzput),y + sta (tsput),y + iny + lda (lzput),y + sta (tsput),y + + tya + dey + + adc tsput + sta tsput + bcs lz2_put_hi + skp: + inc tsget + bne entry2 + inc tsget + 1 + bne entry2 + + lz2_put_hi: + inc tsput + 1 + bcs skp + + ; LZ + ts_delz: + + lsr a + sta lzto + 1 + + iny + + lda tsput + bcc long + + sbc (tsget),y + sta lzput + lda tsput+1 + + sbc #$00 + + ldx #2 + ; lz MUST decrunch forward + lz_put: + sta lzput+1 + + ldy #0 + + lda (lzput),y + sta (tsput),y + + iny + lda (lzput),y + sta (tsput),y + + ts_delz_loop: + + iny + + lda (lzput),y + sta (tsput),y + + lzto: cpy #0 + bne ts_delz_loop + + tya + + ; update zero page with a = runlen, x = 2, y = 0 + ldy #0 + ; clc not needed as we have len - 1 in A (from the encoder) and C = 1 + + jmp updatezp_noclc + + optRun: + ldy #255 + sty tstemp + + ldx #1 + ; A is zero + + bne runStart + + long: + ; carry is clear and compensated for from the encoder + adc (tsget),y + sta lzput + iny + ; ILLEGAL lax (tsget),y + lda (tsget),y + tax + ora #$80 + adc tsput + 1 + + cpx #$80 + rol lzto + 1 + ldx #3 + + bne lz_put + + ; !notreached! + }} + } + + /*** ; prog8 source code for the RLE routines above: diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst index 2498e1c39..f9d4fb762 100644 --- a/docs/source/libraries.rst +++ b/docs/source/libraries.rst @@ -324,6 +324,30 @@ API is slightly experimental and may change in a future version. **NOTE:** for speed reasons this decompressor is NOT bank-aware and NOT I/O register aware; it only outputs to a memory buffer somewhere in the active 64 Kb address range. +``decode_tscrunch_inplace (uword compressed)`` + Decompress a block of data compressed in the TSCrunch format *inplace*. + This can save an extra memory buffer if you are reading crunched data from a file into a buffer. + It has extremely fast decompression (approaching RLE speeds), + better compression as RLE, but slightly worse compression ration than LZSA. + See https://github.com/tonysavon/TSCrunch for the compression format and compressor tool. + **NOTE:** for speed reasons this decompressor is NOT bank-aware and NOT I/O register aware; + it only outputs to a memory buffer somewhere in the active 64 Kb address range. + + .. note:: + The TSCrunch in-place format is a bit different than regular memory decompression. + It works with PRG files (so with a 2 byte load-address header) for both the *source* and *compressed* data files. + So if you want to compress and decompress a block of data from $a000-$c000 your source file has to start with + the bytes $00 $0a, then followed by the 8192 data byes, for a total of 8194 bytes. + Then you need to call the compressor program with the '-i' argument to tell it to create an in-place compressed data file. + The data file will *not* be loaded at $a000 but have its own load address closer to the end of the memory buffer. + If all is well, you can then load and decompress it like so:: + + uword tsi_start_addr = diskio.get_loadaddress("data8kb.tsi") + cx16.rambank(2) ; or whatever ram bank you want on the X16 + void diskio.load("data8kb.tsi", 0) ; not load_raw! + cx16.rambank(2) ; make sure the ram bank is still the same + compression.decode_tscrunch_inplace(tsi_start_addr) + ``decode_zx0 (uword compressed, uword target)`` Decompress a block of data compressed in the ZX0 format. This has faster decompression than LZSA, and a slightly better compression ratio as well. diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 630196738..1feee5384 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,6 +1,8 @@ TODO ==== +- diskio: if loading a hiram bank exactly fills the bank, then end adress is reset to $a000 still and the bank is increased by 1. that should probably not happen + - DONE: make word arrays split by default and add new @nosplit tag to make an array use the old linear storage format - DONE: &splitarray will give you the start address of the lsb-array (which is immediately followed by the msb-array) - DONE: add &< and &> operators to get the address of the lsb-array and msb-array, respectively. (&< is just syntactic sugar for &) @@ -74,7 +76,6 @@ IR/VM Libraries --------- - monogfx: flood fill should be able to fill stippled -- Add in-place TSCrunch decoder routine as well to compression lib? May come in handy where you load a block of compressed data, decompress it in place in the same buffer/memory bank - Sorting module gnomesort_uw could be optimized more, rewrite in asm? Shellshort seems consistently faster even if most of the words are already sorted. - Add split-word array sorting routines to sorting module? - pet32 target: make syslib more complete (missing kernal routines)?