dos33fsprogs/utils/asoft_basic-utils/tokenize_asoft.c
Vince Weaver 53783d0cf0 tokenize_asoft: fix issue with spaces in tokens
the Applesoft tokenizer essentially allows spaces anywhere in tokens
   HCOLOR=3
   HCOLOR = 3
   H C O L O R = 3
   HCOLOR               = 3
are all the same

this fixes an issue when parsing on some code I had, I hope it doesn't
break other things.  Need to add some test cases
2023-12-06 23:29:19 -05:00

377 lines
8.4 KiB
C

/* tokenize_asoft: Tokenize an Applesoft BASIC program */
/* by Vince Weaver <vince@deater.net> */
#include <stdio.h>
#include <string.h> /* strlen() */
#include <stdlib.h> /* exit() */
#include <unistd.h> /* getopt() */
#include "version.h"
static int debug=0;
/* TODO */
/* match lowercase tokens as well as upper case ones */
/* Info from http://docs.info.apple.com/article.html?coll=ap&artnum=57 */
/* In memory, applesoft file starts at address $801 */
/* format is <LINE><LINE><LINE>$00$00 */
/* Where <LINE> is: */
/* 2 bytes (little endian) of LINK indicating addy of next line */
/* 2 bytes (little endian) giving the line number */
/* a series of bytes either ASCII or tokens (see below) */
/* a $0 char indicating end of line */
/* $9B */
#define NUM_TOKENS 107
/* Starting at 0x80 */
char applesoft_tokens[][8]={
/* note applesoft ignores all spaces when parsing */
/* so OK that we have PR# instead of PR # here */
/* 80 */ "END","FOR","NEXT","DATA","INPUT","DEL","DIM","READ",
/* 88 */ "GR","TEXT","PR#","IN#","CALL","PLOT","HLIN","VLIN",
/* 90 */ "HGR2","HGR","HCOLOR=","HPLOT","DRAW","XDRAW","HTAB","HOME",
/* 98 */ "ROT=","SCALE=","SHLOAD","TRACE","NOTRACE","NORMAL","INVERSE","FLASH",
/* A0 */ "COLOR=","POP","VTAB","HIMEM:","LOMEM:","ONERR","RESUME","RECALL",
/* A8 */ "STORE","SPEED=","LET","GOTO","RUN","IF","RESTORE","&",
/* B0 */ "GOSUB","RETURN","REM","STOP","ON","WAIT","LOAD","SAVE",
/* B8 */ "DEF","POKE","PRINT","CONT","LIST","CLEAR","GET","NEW",
/* C0 */ "TAB(","TO","FN","SPC(","THEN","AT","NOT","STEP",
/* C8 */ "+","-","*","/","^","AND","OR",">",
/* D0 */ "=","<","SGN","INT","ABS","USR","FRE","SCRN(",
/* D8 */ "PDL","POS","SQR","RND","LOG","EXP","COS","SIN",
/* E0 */ "TAN","ATN","PEEK","LEN","STR$","VAL","ASC","CHR$",
/* E8 */ "LEFT$","RIGHT$","MID$","","","","","",
/* F0 */ "","","","","","","","",
/* F8 */ "","","","","","(","(","("
};
#define LOW(_x) ((_x)&0xff)
#define HIGH(_x) (((_x)>>8)&0xff)
#define MAXSIZE 65535
/* File cannot be longer than 64k */
unsigned char output[MAXSIZE+1];
char *line_ptr;
int line=0;
char input_line[BUFSIZ];
static void show_problem(char *line_ptr) {
int offset,i;
offset=(int)(line_ptr-input_line);
fprintf(stderr,"%s",input_line);
for(i=0;i<offset;i++) fputc(' ',stderr);
fprintf(stderr,"^\n");
}
static int get_line_num(int *linenum, int *custom_offset) {
int num=0;
int offset=0;
/* skip any whitespace */
while((*line_ptr<=' ') && (*line_ptr!=0)) line_ptr++;
/* Custom Offset */
if (*line_ptr=='*') {
line_ptr++;
while(*line_ptr>' ') {
if ((*line_ptr>='0')&&(*line_ptr<='9')) {
offset*=16;
offset+=(*line_ptr)-'0';
} else if ((*line_ptr>='A')&&(*line_ptr<='F')) {
offset*=16;
offset+=(*line_ptr)-'A'+10;
}
else {
fprintf(stderr,"Invalid offset line %d\n",line);
show_problem(line_ptr);
exit(-1);
}
line_ptr++;
}
/* Skip whitespace */
while((*line_ptr<=' ') && (*line_ptr!=0)) line_ptr++;
}
while (*line_ptr>' ') {
if ((*line_ptr<'0')||(*line_ptr>'9')) {
// fprintf(stderr,"Invalid line number line %d\n",line);
// show_problem(line_ptr);
// exit(-1);
/* not a bug */
break;
}
num*=10;
num+=(*line_ptr)-'0';
line_ptr++;
}
if (!(*line_ptr)) {
fprintf(stderr,"Missing line number line %d\n",line);
exit(-1);
}
if (linenum) *linenum=num;
if (custom_offset) {
*custom_offset=offset;
if (debug) fprintf(stderr,"CO=%x\n",offset);
}
return num;
}
/* Applesoft ignores spaces */
/* so HCOLOR=0 */
/* HCOLOR = 0 */
/* H C O L O R = 0 */
/* are all equivalent */
static int strncmp_ignore_spaces(char *line, char *token,
int size, char **token_end) {
unsigned char u1, u2;
while (size-- > 0) {
do {
u1 = (unsigned char) *line++;
*token_end=line;
} while (u1==' ');
u2 = (unsigned char) *token++;
if (u1 != u2) return u1 - u2;
if (u1 == '\0') return 0;
}
return 0;
}
static int in_quotes=0,in_rem=0;
/* note: try to find longest possible token */
/* otherwise ATN is turned into AT N */
static int find_token(void) {
int ch,i;
char *token_end=NULL;
ch=*line_ptr;
/* end remarks if end of line */
if (in_rem && (ch=='\n')) {
in_rem=0;
return 0;
}
/* end quote if end of line */
if (in_quotes && (ch=='\n')) {
in_quotes=0;
return 0;
}
/* don't skip whitespace in quotes or remarks */
if ((!in_quotes)&&(!in_rem)) {
while(ch<=' ') {
if ((ch=='\n') || (ch=='\r') || (ch=='\0')) {
return 0;
}
line_ptr++;
ch=*line_ptr;
}
}
/* toggle quotes mode */
if (ch=='\"') in_quotes=!in_quotes;
/* don't tokenize if in quotes */
if ((!in_quotes)&&(!in_rem)) {
/* hack: handle ? as a BA PRINT token */
if (line_ptr[0]=='?') {
line_ptr++;
return 0xBA;
}
// note, on IIe Applesoft uppercases lowercase
// this also extends to the uppper ascii? except for $60 (`)?
// we need to somehow take this into account when comparing
// if (ch>0x60) {
// ch=ch-0x20;
// }
// fprintf(stderr,"%s",line_ptr);
for(i=0;i<NUM_TOKENS;i++) {
if (!strncmp_ignore_spaces(line_ptr,applesoft_tokens[i],
strlen(applesoft_tokens[i]),
&token_end)) {
/* HACK: special case to avoid AT/ATN problem */
/* Update, apparently actual applesoft uses */
/* a similar hack. Also the 'A TO' */
/* case which we don't handle because */
/* we like sane whitespace. */
if ((i==69) && (line_ptr[2]=='N')) continue;
// fprintf(stderr,
// "Found token %x (%s) %d\n",0x80+i,
// applesoft_tokens[i],i);
//line_ptr+=strlen(applesoft_tokens[i]);
line_ptr=token_end;
/* REM is 0x32 (0xB2) */
if (i==0x32) in_rem=1;
return 0x80+i;
}
//fprintf(stderr,"%s ",applesoft_tokens[i]);
}
}
//fprintf(stderr,"\n");
/* not a token, just ascii */
line_ptr++;
return ch;
}
static void check_oflo(int size) {
if (size>MAXSIZE) {
fprintf(stderr,"Output file too big!\n");
exit(-1);
}
}
int main(int argc, char **argv) {
int offset=2,i;
int linenum=0,custom_offset=0,lastline=0,link_offset;
int link_value=0x801; /* start of applesoft program */
int token;
int c;
FILE *fff;
/* Check command line arguments */
while ((c = getopt (argc, argv,"d"))!=-1) {
switch (c) {
case 'd':
debug=1;
break;
}
}
/* No file specified, used stdin */
if (optind==argc) {
fff=stdin;
}
else {
fff=fopen(argv[optind],"r");
if (fff==NULL) {
fprintf(stderr,"Error, could not open %s\n",argv[optind]);
return -1;
}
if (debug) fprintf(stderr,"Opened file %s\n",argv[optind]);
}
while(1) {
/* get line from input file */
line_ptr=fgets(input_line,BUFSIZ,fff);
line++;
if (line_ptr==NULL) break;
/* VMW extension, skip between 'if 0 and 'endif */
if (line_ptr[0]=='\'') {
if (!strncmp(line_ptr,"\'.if 0",6)) {
while(1) {
line_ptr=fgets(input_line,BUFSIZ,fff);
line++;
if (line_ptr==NULL) break;
if (!strncmp(line_ptr,"\'.endif",7)) break;
}
}
}
if (line_ptr==NULL) break;
/* VMW extension: use leading ' as a comment char */
if (line_ptr[0]=='\'') continue;
/* skip empty lines */
if (line_ptr[0]=='\n') continue;
get_line_num(&linenum,&custom_offset);
if ((linenum>65535) || (linenum<0)) {
fprintf(stderr,"Invalid line number %d\n",linenum);
exit(-1);
}
if (linenum<lastline) {
fprintf(stderr,"Line counted backwards %d->%d\n",
lastline,linenum);
exit(-1);
}
lastline=linenum;
link_offset=offset;
check_oflo(offset+4);
output[offset+2]=LOW(linenum);
output[offset+3]=HIGH(linenum);
offset+=4;
while(1) {
token=find_token();
output[offset]=token;
if (debug) fprintf(stderr,"%2X ",token);
offset++;
check_oflo(offset);
if (!token) break;
}
/* remarks end at end of line */
in_rem=0;
/* quotes do too */
in_quotes=0;
/* 2 bytes is to ignore size from beginning of file */
link_value=0x801+(offset-2);
/* point link value to next line */
check_oflo(offset+2);
if (custom_offset) {
output[link_offset]=LOW(custom_offset);
output[link_offset+1]=HIGH(custom_offset);
}
else {
output[link_offset]=LOW(link_value);
output[link_offset+1]=HIGH(link_value);
}
}
/* set last link field to $00 $00 which indicates EOF */
check_oflo(offset+2);
output[offset]='\0';
output[offset+1]='\0';
offset+=2;
/* Set filesize */
/* -1 to match observed values */
output[0]=LOW(offset-1);
output[1]=HIGH(offset-1);
/* output our file */
for(i=0;i<offset;i++) putchar(output[i]);
return 0;
}