From 62391aa514ccf9967fc6eb90e39882047ff97f77 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Sun, 25 Feb 2018 01:08:14 -0500 Subject: [PATCH] chiptune_player: implemented stepwise lz4 better, but still not acceptable --- asm_routines/lz4_decode_step.s | 219 ++++++++++++++++++++++++++++ chiptune_player/Makefile | 2 +- chiptune_player/TODO | 2 + chiptune_player/chiptune_player.dsk | Bin 143360 -> 143360 bytes chiptune_player/chiptune_player.s | 32 +++- chiptune_player/interrupt_handler.s | 28 +++- chiptune_player/zp.inc | 12 +- gr-sim/lz4d_verbose.c | 6 +- 8 files changed, 279 insertions(+), 22 deletions(-) create mode 100644 asm_routines/lz4_decode_step.s diff --git a/asm_routines/lz4_decode_step.s b/asm_routines/lz4_decode_step.s new file mode 100644 index 00000000..a0a30b06 --- /dev/null +++ b/asm_routines/lz4_decode_step.s @@ -0,0 +1,219 @@ +; Stepwise LZ4 data decompressor for Apple II +; Only decodes X tokens before returning + +; Code by Peter Ferrie (qkumba) (peter.ferrie@gmail.com) +; "LZ4 unpacker in 143 bytes (6502 version) (2013)" +; http://pferrie.host22.com/misc/appleii.htm +; This is that code, but with comments and labels added for clarity. +; I also found a bug when decoding with runs of multiples of 256 +; which has since been fixed upstream. + +; For LZ4 reference see +; https://github.com/lz4/lz4/wiki/lz4_Frame_format.md + +; LZ4 summary: +; +; HEADER: +; Should: check for magic number 04 22 4d 18 +; FLG: 64 in our case (01=version, block.index=1, block.checksum=0 +; size=0, checksum=1, reserved +; MAX Blocksize: 40 (64kB) +; HEADER CHECKSUM: a7 +; BLOCK HEADER: 4 bytes (le) If highest bit set, uncompressed! +; BLOCKS: +; Token byte. High 4-bits literal length, low 4-bits copy length +; + If literal length==15, then following byte gets added to length +; If that byte was 255, then keep adding bytes until not 255 +; + The literal bytes follow. There may be zero of them +; + Next is block copy info. little-endian 2-byte offset to +; be subtracted from current read position indicating source +; + The low 4-bits of the token are the copy length, which needs +; 4 added to it. As with the literal length, if it is 15 then +; you read a byte and add (and if that byte is 255, keep adding) + +;LZ4_SRC EQU $00 +;LZ4_DST EQU $02 +;LZ4_END EQU $04 +;COUNT EQU $06 +;DELTA EQU $08 + +;UNPACK_BUFFER EQU $5E00 ; offset of first unpacked byte + + + ;====================== + ; LZ4 decode + ;====================== + ; input buffer in LZ4_SRC + ; output buffer hardcoded still + ; size in ENDH:ENDL + +lz4_decode_setup: + + lda LZ4_SRC ; packed data offset + clc + adc LZ4_END + sta LZ4_END + lda LZ4_SRC+1 + adc LZ4_END+1 + sta LZ4_END+1 + + lda #>UNPACK_BUFFER ; original unpacked data offset + sta LZ4_DST+1 + lda #acz-El0 zkj`K-_&ju`N})&Q3~gpq2ZbDyrhth(nIuy~ptVT?ngsIICUGXgL*mAxgb>mIad%}P zowk2eH+t{w+qduc?!C8ryKBn0Ys#4Pj<`>MWI2A+O%Gq-ROtoIM)R~lixfctiliut zrVJFLa?2rSWvxc5$!fM*tegtWC(0VbtCtyS!oG*wVN)m^YK7J9_qDc#>-?c+7+MMa zbuPk+RB$A4@;!3+Gfw^EM};GW>CdQw$oS$_+f(C<(G!!%8aCVhw9DA(t!#T`~qv&b9` zgS&@!4h{^zIj97SZL0KLTmN?3Ffq7$a9~hLwhefb975*sN<#6XlDMp?-AJI2@S|qn zzck8!YV@{S%Ta*jWMPI1-juN}$?5q@gcAdf8+@6~)%Pej zyH<>R{9Og9O6Ee2F)}8`%vczX;Z<(a11P1>58t3w>2rRBpSnVgxeee86Ebz%Sqsat zJS(swLok3L8H%A91H+&tA2x6wSm2r)aQStFKC2Wd#fqw2QTFS9M(v-g%t2LvE#ef& zNcyMqn(1Ps3!dcQUj_J%0P{uta^eR{x2b^)?7uGQv@#3(MBSj2=y^&h@>+&e<#4+M zPjhH=Ltm@hTmz9I##;x6M99kUA2M7krPNGjJ9bDYMetW5f-ly;ycNcA42dT8D%+JE z%Ht?eI7uj3*bv!gjlW-d$67y#ka*=LH0EH7LS+tF{g6a)!k_bcE+z{QLHoQ={lizX z7tP+u{d`ey$*!XfdW=x$2}BG!X^#;Kkpgi-0qXg$9R0@yyB>}0#82Y4yuJsqr}$yw zRYH22kS?dMMH`wU;m{HfI+}=Dzb{gMTWks~%_>*>R{66+YpuU2G%vNZMVcELvvN38 z9g1Y7V0+j%zqk>dZCO=Avzqzp*H(P)6e(pdwcul?JPP{-^~tY`OsbGAges^nf4!ER z5!8Y!#pGg9t-f;iN|f?X#q4xeilTR)y{-9F#6zz=H5;cBXT-#W7zc^h#l$(xFN%rx zF#nsF_()X8uH41n5EFA)LrMvQq|RO`s3@54fC4Ex@1s^q)Je!9ASIeGZ;=wMn6Hu& z?UK6un`(AOLgk*3)c$Ysm>KB^7Jv6m`(gJNKpJy>0A>uRiCt>`pH?s|%Nkh|YTFm) zPy1|6;mS}LHhBV`kOv+4FwoXq=L?rsEt@~>#{!>#_l$>KXMyyh8+;)kUkfz%B$zww zzJ&c>1T$vr-^6n~&kMZBsh07dY2gC$Sn)E${|i7JcAWvzeW=cLLSs*F&$?K=JH8=K z5cq==>Fs!AL#z|7+W}X_yB~?c)v=Ckv0nJVHazll#ya|9V}TdJDIAETa@QZ!+#(n) zN8Xkjs{KJW( za|H1di2x{Hg*g-&%>CPc7t$Uao09;?k&>wK4&Wk1i{?e#$Xn#Z6I>>U+HhBjAHBbB=+IJIy$ zy02``Fem`CkjLb)dB!|bo;lBw$7$R~P>8$ctOcRkZMSba+eg`FzU&0YJ2)KnL)u*% zK@n*eH10`oZoa>Ku*DaS;0scu-VfWF8&>(lt$tYL@m6}Omf!Z%Jqm_fB$eq|GqPsI zbK-Gtg+b$<0oRN_Lj82qI0^yBW^uR^AP)s(83*Mk6}RqYwPC-n$sdL#xArIot!{|G z)uC{(_I4+iflbwg}N#bYtFvl_uEh`g0lJB?10)9kc3IVbNFoFXJ3fFz_K z4GoZiEHpwBG(!vIAP)s7x(F9)Hy7oiT?QB9VqHdzT8B_027l0B6A3reKzxM5`Cr*m zSW{E#^?J(*3P<4g!MmZaIpPcZTcIcW__nU!x}^`7*E<&2gaTs;f^eB#7L6+)i;(*_ z>XG|41Ikml9oE!`8(JexzE&6sK|iWYdv&O`9l8kemitG0dp6xc=;?;-zGiq&RaF&q sxm;s`-+)sxNf6qS0`eSAzyjpDCZBt+}x|w7OXf90H&J3yK-0KL7v# delta 1967 zcmZWqeQX@X6`$GNyS?3;`|$4G-rDin6LWUrg2S~nZ4e~5kV2X`gwR+`1&TH%re^_C z)&5Z+A4bBHpt^@*4`rK5sLM4Mvkv0JPO=K5f+LvPCOFsN5JhQyMhGjSm2(nOQtTwx zS*K~LcBOl7-tT?<-oBZ=d2S~3+)T(xd!<)kOMu4Z62XdAqn4~ijC z5@m{>HBe9thJsR14u*q>B}@!te&QLALs#@(+zMCn-il3d4Ea^ouZD+W-t?^y!D^Y4 zd0CJHa!?lKkSxiv9F~zBk?E%MZ?dLu^Ak3tp@*Upkf6j#yd+2gDM-=FZH(pM$n;$e z!ch#T!(njqp^5XWVA#en=&=GL-`(&DH=Ldh=N=VuZ7BDoK-=zHKeGOzBjI%*eo4k# zuHf%2xPZ)&6lo-sd`(_*&y#DeK`MB!g4d`supD$fwx}NT!GBtjOs2WO>Q+=#;Pu7P&0l~!w*$4>xyfUt6MM&^!YmRZ&W<5<5Cho ztmSFB{Ip*XkQCmfx(U+gN{qCcwQI0y+^<-J zRpctZqWJ-sR9Mh8hj%r6vdT>`@qIVRqyv}_x;z1I1EUKNkcZuXK!=$`7-J&fa{qeUX}0h$75e*KDQT~=IJ~_I{qwG^TuGtJb~kcZ zr4R1D{eCv4v+AOIab>Z#=58QoRo_e-`MGW(drZw9SG!^MJvIA*ugBHwNnf8*v!_*h zV)wTqWi|V?k6co-->7tc_mY;_^<9f;*7X`Iwd`FQi&(8?f8gsiTJ`~7uhX&{Gd-|!LqLYW_9yfV{mZsQPeC6tx`O+-S@A--Y_afuZk8)@ zv%H1B&?k!XABn|Fmcvfa{=Vc3@p*UdKquaubRYM{Nnfn-#ov6f-xu%tqKXMeJ=T{j z$4UBkYh`CyZwu=+tY=<(A%5XN=h*vgMb><8r=6i5_-eK|+b z!fH7Zj-W^+5{<-Ir?2NS!pQWm8v?^ezLGo?gDDuo5P)0a05W$Q?4gBc?g>@O08}af z*q`U%t?d6#^D1$O5Vu6IBPLwR)dD-sbf^vpQ==!wChaN{R@=WY;eIX{u_JY`UT6k< z&`#H}AVm~D94$20&?E;I}p{_O{-vQ zR1fGuUDQLmq|17kO>5W>{q|?@2)1jwU@eyn+h_+&UH5G9I{pa{WBWTh;3O;Cy?vyZYG{P6c|49_ki7S2HwWii#EE<-UP~Tu+~QIus;svM#H&2w%a9X bWabNuZiL6HxfIvHm{)L(UNPACK_BUFFER+$2A00) ; in proper chunk (1 of 3) ; 2 +update_r0_pointer: + sta INH ; update r0 pointer ; 3 ;============ ; 18 -update_r0_pointer: - sta INH ; update r0 pointer ; 3 ;================================= ; Finally done with this interrupt diff --git a/chiptune_player/zp.inc b/chiptune_player/zp.inc index f62ae018..3bdfda40 100644 --- a/chiptune_player/zp.inc +++ b/chiptune_player/zp.inc @@ -1,11 +1,10 @@ .define EQU = -LZ4_SRC EQU $00 -LZ4_DST EQU $02 -LZ4_END EQU $04 -COUNT EQU $06 -DELTA EQU $08 - +LZ4_SRC EQU $00 +LZ4_DST EQU $02 +LZ4_END EQU $04 +COUNT EQU $06 +DELTA EQU $08 ;; Zero page monitor routines addresses @@ -85,6 +84,7 @@ DECODER_STATE EQU $7F ;NUM1L EQU $7E ;NUM1H EQU $7F CHUNKSIZE EQU $80 +LZ4_DONE EQU $81 A_COLOR EQU $83 B_COLOR EQU $84 diff --git a/gr-sim/lz4d_verbose.c b/gr-sim/lz4d_verbose.c index fbd9ce59..114cbdfc 100644 --- a/gr-sim/lz4d_verbose.c +++ b/gr-sim/lz4d_verbose.c @@ -109,6 +109,7 @@ int main(int argc, char **argv) { FILE *fff; int size; short orgoff,paksize,pakoff; + int token_count=0; init_6502(); @@ -186,7 +187,8 @@ int main(int argc, char **argv) { y=0; // used for offset //ldy #0 parsetoken: - printf("LOAD TOKEN: "); + token_count++; + printf("LOAD TOKEN %d: ",token_count); getsrc(); // jsr getsrc // get token pha(); // save for later // pha @@ -290,6 +292,8 @@ done: printf("Out size=%d\n",out_size); + printf("Total tokens: %d\n",token_count); + fwrite(&ram[ORGOFFSET],1,out_size,fff); fclose(fff);