chiptune_ update krw decode

lz4 is taking 0.6s!
This commit is contained in:
Vince Weaver 2018-02-23 01:03:18 -05:00
parent f3b7a21ce2
commit b6e4b5ac8c

View File

@ -8,13 +8,14 @@ static unsigned char input[MAX_INPUT];
#define LZ4_BUFFER 0x2000 #define LZ4_BUFFER 0x2000
#define ORGOFFSET 0x6000 #define ORGOFFSET 0x6000
#define src 0x0
#define dst 0x2
#define end 0x4 #define end 0x4
#define count 0x6
#define delta 0x8 #define LZ4_SRC 0x00
#define LZ4_DST 0x02
#define LZ4_END 0x04
#define COUNT 0x06
#define DELTA 0x08
#define CH 0x24 #define CH 0x24
#define CV 0x25 #define CV 0x25
@ -22,249 +23,221 @@ static unsigned char input[MAX_INPUT];
#define OUTL 0xFE #define OUTL 0xFE
#define OUTH 0xFF #define OUTH 0xFF
static int cycles;
static void getsrc(void) { static void getsrc(void) {
//getsrc: //getsrc:
a=ram[y_indirect(src,y)]; // lda (src), y a=ram[y_indirect(LZ4_SRC,y)]; cycles+=5; // lda (LZ4_SRC), y
ram[LZ4_SRC]++; cycles+=5; //inc LZ4_SRC
printf("LOAD %02X%02X: %02X\n",ram[src+1],ram[src],a); cycles+=2;
if (ram[LZ4_SRC]!=0) {
ram[src]++; //inc src cycles+=1;
if (ram[src]!=0) goto done_getsrc; //bne + goto done_getsrc; //bne +
ram[src+1]++; //inc src+1 }
done_getsrc: ; ram[LZ4_SRC+1]++; cycles+=5; //inc LZ4_SRC+1
done_getsrc:
cycles+=6;
//+ rts //+ rts
} }
void buildcount(void) { void buildcount(void) {
printf("\tBUILDCOUNT: A=0x%x\n",a);
//buildcount: //buildcount:
x=1; // ?? // ldx #1 x=1; cycles+=2; // ?? // ldx #1
ram[count+1]=x; // ?? // stx count+1 ram[COUNT+1]=x; cycles+=3; // ?? // stx COUNT+1
cmp(0xf); // if 15, more complicated // cmp #$0f cmp(0xf); cycles+=2; // if 15, more complicated // cmp #$0f
if (z==0) goto done_buildcount; // otherwise A is count // bne ++ cycles+=2;
if (z==0) {
cycles+=1;
goto done_buildcount; // otherwise A is count // bne ++
}
buildcount_loop: buildcount_loop:
ram[count]=a; //- sta count ram[COUNT]=a; cycles+=3; //- sta count
// printf("MBC "); getsrc(); cycles+=6; //jsr getsrc
getsrc(); //jsr getsrc x=a; cycles+=2; //tax
printf("\tADDITIONAL BUILDCOUNT 0x%x, adding 0x%x\n",a,ram[count]); c=0; cycles+=2; //clc
x=a; //tax adc(ram[COUNT]);cycles+=3; //adc COUNT
c=0; //clc cycles+=2;
adc(ram[count]); //adc count if (c==0) {
printf("\tGOT 0x%x c=%d\n",a,c); cycles+=1;
if (c==0) goto skip_buildcount; // bcc + goto skip_buildcount; // bcc +
ram[count+1]++; //inc count+1 }
ram[COUNT+1]++; cycles+=5; //inc COUNT+1
skip_buildcount: skip_buildcount:
printf("\tUPDATED COUNT %02X%02X\n",ram[count+1],a); x++; cycles+=2; // check if x is 255 //+ inx
x++; // check if x is 255 //+ inx if (x==0) {
if (x==0) goto buildcount_loop; // if so, add in next byte //beq - cycles+=1;
done_buildcount: ; //++ rts goto buildcount_loop; // if so, add in next byte //beq -
printf("\tBUILDCOUNT= %02X%02X r[c+1]=%02X r[c]=%02X a=%02X x=%02X\n", }
ram[count+1],a,ram[count+1],ram[count],a,x); done_buildcount:
cycles+=6; //++ rts
} }
static void putdst(void) { static void putdst(void) {
// printf("PUTADDR=%04X\n",y_indirect(dst,y)); // putdst:
// putdst: ram[y_indirect(LZ4_DST,y)]=a; cycles+=6; // sta (LZ4_DST), y
ram[y_indirect(dst,y)]=a; // sta (dst), y ram[LZ4_DST]++; cycles+=5; // inc LZ4_DST
if (y!=0) printf("ERROR ERROR ERROR ERROR ERROR\n"); cycles+=2;
printf("\t\tPUT: %02X%02X = %02X\n",ram[dst+1],ram[dst],a); if (ram[LZ4_DST]!=0) {
ram[dst]++; // inc dst cycles+=1;
if (ram[dst]!=0) goto putdst_end; // bne + goto putdst_end; // bne +
ram[dst+1]++; // inc dst+1 }
ram[LZ4_DST+1]++; cycles+=5; // inc LZ4_DST+1
putdst_end:; putdst_end:;
//+ rts cycles+=6; //+ rts
} }
static void getput(void) { static void getput(void) {
// getput: // getput:
printf("GP ");
getsrc(); // jsr getsrc getsrc(); // jsr getsrc
cycles+=6;
putdst(); // ; fallthrough putdst(); // ; fallthrough
} }
// 202 -> 201 -> 100 -> 1FF
// 201 -> 100 -> 1FF
// 200 -> 2ff -> 2fe
// 1ff -> 1fe
static void docopy(void) { static void docopy(void) {
printf("\tDOCOPY ENTRY: %02X%02X\n",ram[count+1],x);
// docopy: // docopy:
docopy_label: docopy_label:
printf("\tDOCOPY %02X%02X: ",ram[count+1],x); getput(); cycles+=6; // jsr getput
getput(); // jsr getput x--; cycles+=2; // dex
x--; // dex cycles+=2;
if (x!=0) goto docopy_label; // bne docopy if (x!=0) {
ram[count+1]--; // dec count+1 cycles+=1;
if (ram[count+1]!=0) goto docopy_label; //bne docopy goto docopy_label; // bne docopy
}
ram[COUNT+1]--; cycles+=5; // dec COUNT+1
cycles+=2;
if (ram[COUNT+1]!=0) {
cycles++;
goto docopy_label; //bne docopy
}
cycles+=6;
//rts //rts
} }
#define orgoff 0x6000
int lz4_decode(void) { int lz4_decode(void) {
FILE *fff; FILE *fff;
int size; cycles=0;
short orgoff,paksize,pakoff;
//LZ4 data decompressor for Apple II //LZ4 data decompressor for Apple II
//Peter Ferrie (peter.ferrie@gmail.com) //Peter Ferrie (peter.ferrie@gmail.com)
// lz4_decode:
//init = 0 ;set to 1 if you know the values a=ram[LZ4_SRC]; cycles+=3; // lda LZ4_SRC
//hiunp = 0 ;unpacker entirely in high memory c=0; cycles+=2; // clc
//hipak = 0 ;packed data entirely in high memory (requires hiunp) adc(ram[LZ4_END]); cycles+=3; // adc LZ4_END
ram[LZ4_END]=a; cycles+=3; // sta LZ4_END
a=ram[LZ4_SRC+1]; cycles+=3; // lda LZ4_SRC+1
adc(ram[LZ4_END+1]); cycles+=3; // adc LZ4_END+1
ram[LZ4_END+1]=a; cycles+=3; // sta LZ4_END+1
//oep = 0; //first unpacked byte to run, you must set this by yourself a=high(orgoff); cycles+=2; // lda #>orgoff ; original unpacked data offset
orgoff = ORGOFFSET; //offset of first unpacked byte, you must set this by yourself ram[LZ4_DST+1]=a; cycles+=3; // sta LZ4_DST+1
paksize = size-0xb-8; a=low(orgoff); cycles+=2; // lda #<orgoff
// minus 4 for checksum at end ram[LZ4_DST]=a; cycles+=3; // sta LZ4_DST
// not sure what other 4 is from?
// block checksum? though had that disabled?
//size of packed data, you must set this by yourself if hiunp=0 // printf("packed size: raw=%x, adj=%x\n",size,paksize);
pakoff = LZ4_BUFFER+11; // 11 byte offset to data? printf("packed addr: %02X%02X\n",ram[LZ4_SRC+1],ram[LZ4_SRC]);
//LCBANK2 = $c083
//MOVE = $fe2c
a=(pakoff&0xff); //lda #<pakoff ;packed data offset
ram[src]=a; //sta src
a=(pakoff+paksize)&0xff;//lda #<(pakoff+paksize) ;packed data size
ram[end]=a; // sta end
a=(pakoff>>8); //lda #>pakoff
ram[src+1]=a; //sta src+1
a=(pakoff+paksize)>>8; //lda #>(pakoff+paksize)
ram[end+1]=a; // sta end+1
a=(orgoff>>8); //lda #>orgoff ;original unpacked data offset
ram[dst+1]=a; //sta dst+1
a=(orgoff&0xff); //lda #<orgoff
ram[dst]=a; // sta dst
printf("packed size: raw=%x, adj=%x\n",size,paksize);
printf("packed addr: %02X%02X\n",ram[src+1],ram[src]);
printf("packed end : %02X%02X\n",ram[end+1],ram[end]); printf("packed end : %02X%02X\n",ram[end+1],ram[end]);
printf("dest addr : %02X%02X\n",ram[dst+1],ram[dst]); printf("dest addr : %02X%02X\n",ram[LZ4_DST+1],ram[LZ4_DST]);
// https://github.com/lz4/lz4/wiki/lz4_Frame_format.md //unpmain:
y=0; cycles+=2; // used for offset //ldy #0
// Should: check for magic number 04 22 4d 18
// FLG: 64 in our case (01=version, block.index=1, block.checksum=0
// size=0, checksum=1, reserved
// MAX Blocksize: 40 (64kB)
// HEADER CHECKSUM: a7
// BLOCK HEADER: 4 bytes (le) If highest bit set, uncompressed!
//unpack: //;unpacker entrypoint
// goto unpmain; // jmp unpmain
//unpack:
y=0; // used for offset //ldy #0
parsetoken: parsetoken:
printf("LOAD TOKEN: "); getsrc(); cycles+=6; // jsr getsrc
getsrc(); // jsr getsrc
// get token // get token
pha(); // save for later // pha pha(); cycles+=3; // save for later // pha
lsr(); // num literals in top 4 // lsr lsr(); cycles+=2; // num literals in top 4// lsr
lsr(); // lsr lsr(); cycles+=2; // lsr
lsr(); // lsr lsr(); cycles+=2; // lsr
lsr(); // lsr lsr(); cycles+=2; // lsr
if (a==0) goto copymatches; // if zero, no literals // beq copymatches cycles+=2;
if (a==0) {
cycles+=1;
goto copymatches; // if zero, no literals // beq copymatches
}
buildcount(); // otherwise, build the count // jsr buildcount buildcount(); cycles+=6; // otherwise, build the count // jsr buildcount
x=a; // tax x=a; cycles+=2; // tax
docopy(); // jsr docopy docopy(); cycles+=6; // jsr docopy
a=ram[src]; // lda src a=ram[LZ4_SRC]; cycles+=3; // lda LZ4_SRC
cmp(ram[end]); // cmp end cmp(ram[end]); cycles+=3; // cmp end
a=ram[src+1]; // lda src+1 a=ram[LZ4_SRC+1]; cycles+=3; // lda LZ4_SRC+1
sbc(ram[end+1]); // sbc end+1 sbc(ram[end+1]); cycles+=3; // sbc end+1
cycles+=2;
if (c) { if (c) {
printf("Done!\n"); printf("Done!\n");
printf("src : %02X%02X\n",ram[src+1],ram[src]); printf("src : %02X%02X\n",ram[LZ4_SRC+1],ram[LZ4_SRC]);
printf("packed end : %02X%02X\n",ram[end+1],ram[end]); printf("packed end : %02X%02X\n",ram[end+1],ram[end]);
cycles+=1;
goto done; // bcs done goto done; // bcs done
} }
copymatches: copymatches:
printf("\tDELTAL "); getsrc(); cycles+=6; // jsr getsrc
getsrc(); // jsr getsrc ram[DELTA]=a; cycles+=3; // sta DELTA
ram[delta]=a; // sta delta getsrc(); cycles+=6; // jsr getsrc
printf("\tDELTAH "); ram[DELTA+1]=a; cycles+=3; // sta DELTA+1
getsrc(); // jsr getsrc pla(); cycles+=3; // restore token // pla
ram[delta+1]=a; // sta delta+1 a=a&0xf; cycles+=2; // get bottom 4 bits // and #$0f
printf("\tDELTA is %02X%02X\n",ram[delta+1],ram[delta]); buildcount(); cycles+=6; // jsr buildcount
pla(); // restore token // pla
a=a&0xf; // get bottom 4 bits // and #$0f
buildcount(); // jsr buildcount
c=0; // clc c=0; cycles+=2; // clc
adc(4); // adc #4 adc(4); cycles+=2; // adc #4
x=a; // tax x=a; cycles+=2; // tax
if (x==0) goto copy_skip; //BUGFIX // beq + cycles+=2;
if (c==0) goto copy_skip; // bcc + if (x==0) {
ram[count+1]++; // inc count+1 cycles+=1;
goto copy_skip; //BUGFIX // beq +
}
cycles+=2;
if (c==0) {
cycles+=1;
goto copy_skip; // bcc +
}
ram[COUNT+1]++; cycles+=5; // inc count+1
copy_skip: copy_skip:
a=ram[src+1]; //+ lda src+1 a=ram[LZ4_SRC+1]; cycles+=3; //+ lda src+1
pha(); // pha pha(); cycles+=3; // pha
a=ram[src]; // lda src a=ram[LZ4_SRC]; cycles+=3; // lda src
pha(); // pha pha(); cycles+=3; // pha
printf("\tSAVED SRC: %02X%02X\n",ram[src+1],ram[src]); c=1; cycles+=2; // sec
// printf("CALCULATING: DST %02X%02X - DELTA %02X%02X\n",ram[dst+1],ram[dst],ram[delta+1],ram[delta]); a=ram[LZ4_DST]; cycles+=3; // lda LZ4_DST
c=1; // sec sbc(ram[DELTA]); cycles+=3; // sbc DELTA
a=ram[dst]; // lda dst ram[LZ4_SRC]=a; cycles+=3; // sta LZ4_SRC
sbc(ram[delta]); // sbc delta a=ram[LZ4_DST+1]; cycles+=3; // lda LZ4_DST+1
ram[src]=a; // sta src sbc(ram[DELTA+1]); cycles+=3; // sbc DELTA+1
a=ram[dst+1]; // lda dst+1 ram[LZ4_SRC+1]=a; cycles+=3; // sta LZ4_SRC+1
sbc(ram[delta+1]); // sbc delta+1
ram[src+1]=a; // sta src+1 docopy(); cycles+=6; // jsr docopy
printf("\tNEW SRC: %02X%02X\n",ram[src+1],ram[src]); pla(); cycles+=3; // pla
docopy(); // jsr docopy ram[LZ4_SRC]=a; cycles+=3; // sta LZ4_SRC
pla(); // pla pla(); cycles+=3; // pla
ram[src]=a; // sta src ram[LZ4_SRC+1]=a; cycles+=3; // sta LZ4_SRC+1
pla(); // pla cycles+=3;
ram[src+1]=a; // sta src+1
printf("\tRESTORED SRC: %02X%02X\n",ram[src+1],ram[src]);
goto parsetoken; // jmp parsetoken goto parsetoken; // jmp parsetoken
done: done:
cycles+=3;
pla(); // pla pla(); // pla
cycles+=6; // rts
int out_size=(ram[LZ4_DST+1]<<8)+ram[LZ4_DST];
int out_size=(ram[dst+1]<<8)+ram[dst];
out_size-=ORGOFFSET; out_size-=ORGOFFSET;
printf("dest addr : %02X%02X\n",ram[dst+1],ram[dst]); printf("dest addr : %02X%02X\n",ram[LZ4_DST+1],ram[LZ4_DST]);
int i,j,addr,temp;
addr=ORGOFFSET;
printf("\n");
for(i=0;i<256;i++) {
if (i%16==0) printf("%04X: ",addr+i);
printf("%02X ",ram[addr+i]);
if (i%16==15) {
for(j=0;j<16;j++) {
temp=ram[((addr+i)&0xfff0)+j];
if ((temp<' ') || (temp>127)) printf(".");
else printf("%c",temp);
}
printf("\n");
}
}
printf("\n");
// rts
fff=fopen("out.out","w"); fff=fopen("out.out","w");
if (fff==NULL) { if (fff==NULL) {
@ -278,6 +251,9 @@ done:
fclose(fff); fclose(fff);
printf("Cycles: %d\t %lfs\n",cycles,(double)cycles/1023000.0);
printf("\t50Hz\t %lfs\n",(double)1/50.0);
return 0; return 0;
} }
@ -362,6 +338,40 @@ int main(int argc, char **argv) {
a=23; a=23;
print_header_info(); print_header_info();
y=0;
a=high(LZ4_BUFFER+3);
ram[LZ4_SRC+1]=a;
a=low(LZ4_BUFFER+3);
ram[LZ4_SRC]=a;
a=ram[y_indirect(LZ4_SRC,y)];
c=0;
adc(ram[LZ4_SRC]);
ram[LZ4_SRC]=a;
a=ram[LZ4_SRC+1];
adc(0);
ram[LZ4_SRC+1]=a;
// next_subsong:
y=0;
a=ram[y_indirect(LZ4_SRC,y)];
ram[LZ4_END]=a;
y++;
a=ram[y_indirect(LZ4_SRC,y)];
ram[LZ4_END+1]=a;
y++;
a=2;
c=0;
adc(ram[LZ4_SRC]);
ram[LZ4_SRC]=a;
a=(ram[LZ4_SRC+1]);
adc(0);
ram[LZ4_SRC+1]=a;
lz4_decode();
return 0; return 0;
} }