chiptune_ update krw decode

lz4 is taking 0.6s!
This commit is contained in:
Vince Weaver 2018-02-23 01:03:18 -05:00
parent f3b7a21ce2
commit b6e4b5ac8c

View File

@ -8,13 +8,14 @@ static unsigned char input[MAX_INPUT];
#define LZ4_BUFFER 0x2000
#define ORGOFFSET 0x6000
#define src 0x0
#define dst 0x2
#define end 0x4
#define count 0x6
#define delta 0x8
#define LZ4_SRC 0x00
#define LZ4_DST 0x02
#define LZ4_END 0x04
#define COUNT 0x06
#define DELTA 0x08
#define CH 0x24
#define CV 0x25
@ -22,249 +23,221 @@ static unsigned char input[MAX_INPUT];
#define OUTL 0xFE
#define OUTH 0xFF
static int cycles;
static void getsrc(void) {
//getsrc:
a=ram[y_indirect(src,y)]; // lda (src), y
printf("LOAD %02X%02X: %02X\n",ram[src+1],ram[src],a);
ram[src]++; //inc src
if (ram[src]!=0) goto done_getsrc; //bne +
ram[src+1]++; //inc src+1
done_getsrc: ;
a=ram[y_indirect(LZ4_SRC,y)]; cycles+=5; // lda (LZ4_SRC), y
ram[LZ4_SRC]++; cycles+=5; //inc LZ4_SRC
cycles+=2;
if (ram[LZ4_SRC]!=0) {
cycles+=1;
goto done_getsrc; //bne +
}
ram[LZ4_SRC+1]++; cycles+=5; //inc LZ4_SRC+1
done_getsrc:
cycles+=6;
//+ rts
}
void buildcount(void) {
printf("\tBUILDCOUNT: A=0x%x\n",a);
//buildcount:
x=1; // ?? // ldx #1
ram[count+1]=x; // ?? // stx count+1
cmp(0xf); // if 15, more complicated // cmp #$0f
if (z==0) goto done_buildcount; // otherwise A is count // bne ++
x=1; cycles+=2; // ?? // ldx #1
ram[COUNT+1]=x; cycles+=3; // ?? // stx COUNT+1
cmp(0xf); cycles+=2; // if 15, more complicated // cmp #$0f
cycles+=2;
if (z==0) {
cycles+=1;
goto done_buildcount; // otherwise A is count // bne ++
}
buildcount_loop:
ram[count]=a; //- sta count
// printf("MBC ");
getsrc(); //jsr getsrc
printf("\tADDITIONAL BUILDCOUNT 0x%x, adding 0x%x\n",a,ram[count]);
x=a; //tax
c=0; //clc
adc(ram[count]); //adc count
printf("\tGOT 0x%x c=%d\n",a,c);
if (c==0) goto skip_buildcount; // bcc +
ram[count+1]++; //inc count+1
ram[COUNT]=a; cycles+=3; //- sta count
getsrc(); cycles+=6; //jsr getsrc
x=a; cycles+=2; //tax
c=0; cycles+=2; //clc
adc(ram[COUNT]);cycles+=3; //adc COUNT
cycles+=2;
if (c==0) {
cycles+=1;
goto skip_buildcount; // bcc +
}
ram[COUNT+1]++; cycles+=5; //inc COUNT+1
skip_buildcount:
printf("\tUPDATED COUNT %02X%02X\n",ram[count+1],a);
x++; // check if x is 255 //+ inx
if (x==0) goto buildcount_loop; // if so, add in next byte //beq -
done_buildcount: ; //++ rts
printf("\tBUILDCOUNT= %02X%02X r[c+1]=%02X r[c]=%02X a=%02X x=%02X\n",
ram[count+1],a,ram[count+1],ram[count],a,x);
x++; cycles+=2; // check if x is 255 //+ inx
if (x==0) {
cycles+=1;
goto buildcount_loop; // if so, add in next byte //beq -
}
done_buildcount:
cycles+=6; //++ rts
}
static void putdst(void) {
// printf("PUTADDR=%04X\n",y_indirect(dst,y));
// putdst:
ram[y_indirect(dst,y)]=a; // sta (dst), y
if (y!=0) printf("ERROR ERROR ERROR ERROR ERROR\n");
printf("\t\tPUT: %02X%02X = %02X\n",ram[dst+1],ram[dst],a);
ram[dst]++; // inc dst
if (ram[dst]!=0) goto putdst_end; // bne +
ram[dst+1]++; // inc dst+1
// putdst:
ram[y_indirect(LZ4_DST,y)]=a; cycles+=6; // sta (LZ4_DST), y
ram[LZ4_DST]++; cycles+=5; // inc LZ4_DST
cycles+=2;
if (ram[LZ4_DST]!=0) {
cycles+=1;
goto putdst_end; // bne +
}
ram[LZ4_DST+1]++; cycles+=5; // inc LZ4_DST+1
putdst_end:;
//+ rts
cycles+=6; //+ rts
}
static void getput(void) {
// getput:
printf("GP ");
getsrc(); // jsr getsrc
cycles+=6;
putdst(); // ; fallthrough
}
// 202 -> 201 -> 100 -> 1FF
// 201 -> 100 -> 1FF
// 200 -> 2ff -> 2fe
// 1ff -> 1fe
static void docopy(void) {
printf("\tDOCOPY ENTRY: %02X%02X\n",ram[count+1],x);
// docopy:
docopy_label:
printf("\tDOCOPY %02X%02X: ",ram[count+1],x);
getput(); // jsr getput
x--; // dex
if (x!=0) goto docopy_label; // bne docopy
ram[count+1]--; // dec count+1
if (ram[count+1]!=0) goto docopy_label; //bne docopy
getput(); cycles+=6; // jsr getput
x--; cycles+=2; // dex
cycles+=2;
if (x!=0) {
cycles+=1;
goto docopy_label; // bne docopy
}
ram[COUNT+1]--; cycles+=5; // dec COUNT+1
cycles+=2;
if (ram[COUNT+1]!=0) {
cycles++;
goto docopy_label; //bne docopy
}
cycles+=6;
//rts
}
#define orgoff 0x6000
int lz4_decode(void) {
FILE *fff;
int size;
short orgoff,paksize,pakoff;
cycles=0;
//LZ4 data decompressor for Apple II
//Peter Ferrie (peter.ferrie@gmail.com)
// lz4_decode:
//init = 0 ;set to 1 if you know the values
//hiunp = 0 ;unpacker entirely in high memory
//hipak = 0 ;packed data entirely in high memory (requires hiunp)
a=ram[LZ4_SRC]; cycles+=3; // lda LZ4_SRC
c=0; cycles+=2; // clc
adc(ram[LZ4_END]); cycles+=3; // adc LZ4_END
ram[LZ4_END]=a; cycles+=3; // sta LZ4_END
a=ram[LZ4_SRC+1]; cycles+=3; // lda LZ4_SRC+1
adc(ram[LZ4_END+1]); cycles+=3; // adc LZ4_END+1
ram[LZ4_END+1]=a; cycles+=3; // sta LZ4_END+1
//oep = 0; //first unpacked byte to run, you must set this by yourself
orgoff = ORGOFFSET; //offset of first unpacked byte, you must set this by yourself
paksize = size-0xb-8;
// minus 4 for checksum at end
// not sure what other 4 is from?
// block checksum? though had that disabled?
a=high(orgoff); cycles+=2; // lda #>orgoff ; original unpacked data offset
ram[LZ4_DST+1]=a; cycles+=3; // sta LZ4_DST+1
a=low(orgoff); cycles+=2; // lda #<orgoff
ram[LZ4_DST]=a; cycles+=3; // sta LZ4_DST
//size of packed data, you must set this by yourself if hiunp=0
pakoff = LZ4_BUFFER+11; // 11 byte offset to data?
//LCBANK2 = $c083
//MOVE = $fe2c
a=(pakoff&0xff); //lda #<pakoff ;packed data offset
ram[src]=a; //sta src
a=(pakoff+paksize)&0xff;//lda #<(pakoff+paksize) ;packed data size
ram[end]=a; // sta end
a=(pakoff>>8); //lda #>pakoff
ram[src+1]=a; //sta src+1
a=(pakoff+paksize)>>8; //lda #>(pakoff+paksize)
ram[end+1]=a; // sta end+1
a=(orgoff>>8); //lda #>orgoff ;original unpacked data offset
ram[dst+1]=a; //sta dst+1
a=(orgoff&0xff); //lda #<orgoff
ram[dst]=a; // sta dst
printf("packed size: raw=%x, adj=%x\n",size,paksize);
printf("packed addr: %02X%02X\n",ram[src+1],ram[src]);
// printf("packed size: raw=%x, adj=%x\n",size,paksize);
printf("packed addr: %02X%02X\n",ram[LZ4_SRC+1],ram[LZ4_SRC]);
printf("packed end : %02X%02X\n",ram[end+1],ram[end]);
printf("dest addr : %02X%02X\n",ram[dst+1],ram[dst]);
printf("dest addr : %02X%02X\n",ram[LZ4_DST+1],ram[LZ4_DST]);
// https://github.com/lz4/lz4/wiki/lz4_Frame_format.md
// Should: check for magic number 04 22 4d 18
// FLG: 64 in our case (01=version, block.index=1, block.checksum=0
// size=0, checksum=1, reserved
// MAX Blocksize: 40 (64kB)
// HEADER CHECKSUM: a7
// BLOCK HEADER: 4 bytes (le) If highest bit set, uncompressed!
//unpack: //;unpacker entrypoint
// goto unpmain; // jmp unpmain
//unpack:
y=0; // used for offset //ldy #0
//unpmain:
y=0; cycles+=2; // used for offset //ldy #0
parsetoken:
printf("LOAD TOKEN: ");
getsrc(); // jsr getsrc
getsrc(); cycles+=6; // jsr getsrc
// get token
pha(); // save for later // pha
lsr(); // num literals in top 4 // lsr
lsr(); // lsr
lsr(); // lsr
lsr(); // lsr
if (a==0) goto copymatches; // if zero, no literals // beq copymatches
pha(); cycles+=3; // save for later // pha
lsr(); cycles+=2; // num literals in top 4// lsr
lsr(); cycles+=2; // lsr
lsr(); cycles+=2; // lsr
lsr(); cycles+=2; // lsr
cycles+=2;
if (a==0) {
cycles+=1;
goto copymatches; // if zero, no literals // beq copymatches
}
buildcount(); // otherwise, build the count // jsr buildcount
buildcount(); cycles+=6; // otherwise, build the count // jsr buildcount
x=a; // tax
docopy(); // jsr docopy
a=ram[src]; // lda src
cmp(ram[end]); // cmp end
a=ram[src+1]; // lda src+1
x=a; cycles+=2; // tax
docopy(); cycles+=6; // jsr docopy
a=ram[LZ4_SRC]; cycles+=3; // lda LZ4_SRC
cmp(ram[end]); cycles+=3; // cmp end
a=ram[LZ4_SRC+1]; cycles+=3; // lda LZ4_SRC+1
sbc(ram[end+1]); // sbc end+1
sbc(ram[end+1]); cycles+=3; // sbc end+1
cycles+=2;
if (c) {
printf("Done!\n");
printf("src : %02X%02X\n",ram[src+1],ram[src]);
printf("src : %02X%02X\n",ram[LZ4_SRC+1],ram[LZ4_SRC]);
printf("packed end : %02X%02X\n",ram[end+1],ram[end]);
cycles+=1;
goto done; // bcs done
}
copymatches:
printf("\tDELTAL ");
getsrc(); // jsr getsrc
ram[delta]=a; // sta delta
printf("\tDELTAH ");
getsrc(); // jsr getsrc
ram[delta+1]=a; // sta delta+1
printf("\tDELTA is %02X%02X\n",ram[delta+1],ram[delta]);
pla(); // restore token // pla
a=a&0xf; // get bottom 4 bits // and #$0f
buildcount(); // jsr buildcount
getsrc(); cycles+=6; // jsr getsrc
ram[DELTA]=a; cycles+=3; // sta DELTA
getsrc(); cycles+=6; // jsr getsrc
ram[DELTA+1]=a; cycles+=3; // sta DELTA+1
pla(); cycles+=3; // restore token // pla
a=a&0xf; cycles+=2; // get bottom 4 bits // and #$0f
buildcount(); cycles+=6; // jsr buildcount
c=0; // clc
adc(4); // adc #4
x=a; // tax
if (x==0) goto copy_skip; //BUGFIX // beq +
if (c==0) goto copy_skip; // bcc +
ram[count+1]++; // inc count+1
c=0; cycles+=2; // clc
adc(4); cycles+=2; // adc #4
x=a; cycles+=2; // tax
cycles+=2;
if (x==0) {
cycles+=1;
goto copy_skip; //BUGFIX // beq +
}
cycles+=2;
if (c==0) {
cycles+=1;
goto copy_skip; // bcc +
}
ram[COUNT+1]++; cycles+=5; // inc count+1
copy_skip:
a=ram[src+1]; //+ lda src+1
pha(); // pha
a=ram[src]; // lda src
pha(); // pha
printf("\tSAVED SRC: %02X%02X\n",ram[src+1],ram[src]);
// printf("CALCULATING: DST %02X%02X - DELTA %02X%02X\n",ram[dst+1],ram[dst],ram[delta+1],ram[delta]);
c=1; // sec
a=ram[dst]; // lda dst
sbc(ram[delta]); // sbc delta
ram[src]=a; // sta src
a=ram[dst+1]; // lda dst+1
sbc(ram[delta+1]); // sbc delta+1
ram[src+1]=a; // sta src+1
printf("\tNEW SRC: %02X%02X\n",ram[src+1],ram[src]);
docopy(); // jsr docopy
pla(); // pla
ram[src]=a; // sta src
pla(); // pla
ram[src+1]=a; // sta src+1
printf("\tRESTORED SRC: %02X%02X\n",ram[src+1],ram[src]);
a=ram[LZ4_SRC+1]; cycles+=3; //+ lda src+1
pha(); cycles+=3; // pha
a=ram[LZ4_SRC]; cycles+=3; // lda src
pha(); cycles+=3; // pha
c=1; cycles+=2; // sec
a=ram[LZ4_DST]; cycles+=3; // lda LZ4_DST
sbc(ram[DELTA]); cycles+=3; // sbc DELTA
ram[LZ4_SRC]=a; cycles+=3; // sta LZ4_SRC
a=ram[LZ4_DST+1]; cycles+=3; // lda LZ4_DST+1
sbc(ram[DELTA+1]); cycles+=3; // sbc DELTA+1
ram[LZ4_SRC+1]=a; cycles+=3; // sta LZ4_SRC+1
docopy(); cycles+=6; // jsr docopy
pla(); cycles+=3; // pla
ram[LZ4_SRC]=a; cycles+=3; // sta LZ4_SRC
pla(); cycles+=3; // pla
ram[LZ4_SRC+1]=a; cycles+=3; // sta LZ4_SRC+1
cycles+=3;
goto parsetoken; // jmp parsetoken
done:
cycles+=3;
pla(); // pla
cycles+=6; // rts
int out_size=(ram[dst+1]<<8)+ram[dst];
int out_size=(ram[LZ4_DST+1]<<8)+ram[LZ4_DST];
out_size-=ORGOFFSET;
printf("dest addr : %02X%02X\n",ram[dst+1],ram[dst]);
int i,j,addr,temp;
addr=ORGOFFSET;
printf("\n");
for(i=0;i<256;i++) {
if (i%16==0) printf("%04X: ",addr+i);
printf("%02X ",ram[addr+i]);
if (i%16==15) {
for(j=0;j<16;j++) {
temp=ram[((addr+i)&0xfff0)+j];
if ((temp<' ') || (temp>127)) printf(".");
else printf("%c",temp);
}
printf("\n");
}
}
printf("\n");
// rts
printf("dest addr : %02X%02X\n",ram[LZ4_DST+1],ram[LZ4_DST]);
fff=fopen("out.out","w");
if (fff==NULL) {
@ -278,6 +251,9 @@ done:
fclose(fff);
printf("Cycles: %d\t %lfs\n",cycles,(double)cycles/1023000.0);
printf("\t50Hz\t %lfs\n",(double)1/50.0);
return 0;
}
@ -362,6 +338,40 @@ int main(int argc, char **argv) {
a=23;
print_header_info();
y=0;
a=high(LZ4_BUFFER+3);
ram[LZ4_SRC+1]=a;
a=low(LZ4_BUFFER+3);
ram[LZ4_SRC]=a;
a=ram[y_indirect(LZ4_SRC,y)];
c=0;
adc(ram[LZ4_SRC]);
ram[LZ4_SRC]=a;
a=ram[LZ4_SRC+1];
adc(0);
ram[LZ4_SRC+1]=a;
// next_subsong:
y=0;
a=ram[y_indirect(LZ4_SRC,y)];
ram[LZ4_END]=a;
y++;
a=ram[y_indirect(LZ4_SRC,y)];
ram[LZ4_END+1]=a;
y++;
a=2;
c=0;
adc(ram[LZ4_SRC]);
ram[LZ4_SRC]=a;
a=(ram[LZ4_SRC+1]);
adc(0);
ram[LZ4_SRC+1]=a;
lz4_decode();
return 0;
}