2017-04-24 02:53:20 +00:00
|
|
|
/* tokenize_asoft: Tokenize an Applesoft BASIC program */
|
|
|
|
/* by Vince Weaver <vince@deater.net> */
|
|
|
|
|
2007-08-21 02:26:00 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h> /* strlen() */
|
|
|
|
#include <stdlib.h> /* exit() */
|
2017-04-24 02:53:20 +00:00
|
|
|
#include <unistd.h> /* getopt() */
|
2007-08-21 02:26:00 +00:00
|
|
|
|
2009-08-20 03:11:00 +00:00
|
|
|
#include "version.h"
|
|
|
|
|
2017-04-24 03:24:30 +00:00
|
|
|
static int debug=0;
|
|
|
|
|
2007-08-21 02:26:00 +00:00
|
|
|
/* TODO */
|
2017-01-04 19:07:39 +00:00
|
|
|
/* match lowercase tokens as well as upper case ones */
|
2007-08-21 02:26:00 +00:00
|
|
|
|
|
|
|
/* Info from http://docs.info.apple.com/article.html?coll=ap&artnum=57 */
|
|
|
|
|
2017-04-24 02:53:20 +00:00
|
|
|
/* In memory, applesoft file starts at address $801 */
|
|
|
|
/* format is <LINE><LINE><LINE>$00$00 */
|
|
|
|
/* Where <LINE> is: */
|
|
|
|
/* 2 bytes (little endian) of LINK indicating addy of next line */
|
|
|
|
/* 2 bytes (little endian) giving the line number */
|
|
|
|
/* a series of bytes either ASCII or tokens (see below) */
|
|
|
|
/* a $0 char indicating end of line */
|
2007-08-21 02:26:00 +00:00
|
|
|
|
2012-05-02 18:21:58 +00:00
|
|
|
#define NUM_TOKENS 107
|
2007-08-21 02:26:00 +00:00
|
|
|
|
|
|
|
/* Starting at 0x80 */
|
|
|
|
char applesoft_tokens[][8]={
|
2016-05-25 19:07:05 +00:00
|
|
|
|
2007-08-21 02:26:00 +00:00
|
|
|
/* 80 */ "END","FOR","NEXT","DATA","INPUT","DEL","DIM","READ",
|
|
|
|
/* 88 */ "GR","TEXT","PR #","IN #","CALL","PLOT","HLIN","VLIN",
|
|
|
|
/* 90 */ "HGR2","HGR","HCOLOR=","HPLOT","DRAW","XDRAW","HTAB","HOME",
|
|
|
|
/* 98 */ "ROT=","SCALE=","SHLOAD","TRACE","NOTRACE","NORMAL","INVERSE","FLASH",
|
|
|
|
/* A0 */ "COLOR=","POP","VTAB ","HIMEM:","LOMEM:","ONERR","RESUME","RECALL",
|
|
|
|
/* A8 */ "STORE","SPEED=","LET","GOTO","RUN","IF","RESTORE","&",
|
|
|
|
/* B0 */ "GOSUB","RETURN","REM","STOP","ON","WAIT","LOAD","SAVE",
|
|
|
|
/* B8 */ "DEF FN","POKE","PRINT","CONT","LIST","CLEAR","GET","NEW",
|
2018-06-22 16:25:22 +00:00
|
|
|
/* C0 */ "TAB(","TO","FN","SPC(","THEN","AT","NOT","STEP",
|
2007-08-21 02:26:00 +00:00
|
|
|
/* C8 */ "+","-","*","/","^","AND","OR",">",
|
2019-12-11 13:34:58 +00:00
|
|
|
/* D0 */ "=","<","SGN","INT","ABS","USR","FRE","SCRN(",
|
2007-08-21 02:26:00 +00:00
|
|
|
/* D8 */ "PDL","POS","SQR","RND","LOG","EXP","COS","SIN",
|
|
|
|
/* E0 */ "TAN","ATN","PEEK","LEN","STR$","VAL","ASC","CHR$",
|
|
|
|
/* E8 */ "LEFT$","RIGHT$","MID$","","","","","",
|
|
|
|
/* F0 */ "","","","","","","","",
|
|
|
|
/* F8 */ "","","","","","(","(","("
|
|
|
|
};
|
|
|
|
|
2016-05-31 16:14:36 +00:00
|
|
|
#define LOW(_x) ((_x)&0xff)
|
2007-08-21 02:26:00 +00:00
|
|
|
#define HIGH(_x) (((_x)>>8)&0xff)
|
|
|
|
#define MAXSIZE 65535
|
|
|
|
|
|
|
|
/* File cannot be longer than 64k */
|
|
|
|
unsigned char output[MAXSIZE+1];
|
|
|
|
|
|
|
|
char *line_ptr;
|
|
|
|
int line=0;
|
|
|
|
char input_line[BUFSIZ];
|
|
|
|
|
|
|
|
static void show_problem(char *line_ptr) {
|
|
|
|
|
2016-05-25 19:07:05 +00:00
|
|
|
int offset,i;
|
|
|
|
|
|
|
|
offset=(int)(line_ptr-input_line);
|
|
|
|
fprintf(stderr,"%s",input_line);
|
|
|
|
for(i=0;i<offset;i++) fputc(' ',stderr);
|
|
|
|
fprintf(stderr,"^\n");
|
|
|
|
}
|
2007-08-21 02:26:00 +00:00
|
|
|
|
2017-04-24 03:24:30 +00:00
|
|
|
static int get_line_num(int *linenum, int *custom_offset) {
|
2016-05-25 19:07:05 +00:00
|
|
|
|
|
|
|
int num=0;
|
2017-04-24 03:24:30 +00:00
|
|
|
int offset=0;
|
2016-05-25 19:07:05 +00:00
|
|
|
|
|
|
|
/* skip any whitespace */
|
|
|
|
while((*line_ptr<=' ') && (*line_ptr!=0)) line_ptr++;
|
|
|
|
|
2017-04-24 03:24:30 +00:00
|
|
|
/* Custom Offset */
|
|
|
|
if (*line_ptr=='*') {
|
|
|
|
line_ptr++;
|
|
|
|
while(*line_ptr>' ') {
|
|
|
|
if ((*line_ptr>='0')&&(*line_ptr<='9')) {
|
|
|
|
offset*=16;
|
|
|
|
offset+=(*line_ptr)-'0';
|
|
|
|
} else if ((*line_ptr>='A')&&(*line_ptr<='F')) {
|
|
|
|
offset*=16;
|
|
|
|
offset+=(*line_ptr)-'A'+10;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
fprintf(stderr,"Invalid offset line %d\n",line);
|
|
|
|
show_problem(line_ptr);
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
line_ptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Skip whitespace */
|
|
|
|
while((*line_ptr<=' ') && (*line_ptr!=0)) line_ptr++;
|
|
|
|
}
|
|
|
|
|
2016-05-25 19:07:05 +00:00
|
|
|
while (*line_ptr>' ') {
|
|
|
|
if ((*line_ptr<'0')||(*line_ptr>'9')) {
|
|
|
|
fprintf(stderr,"Invalid line number line %d\n",line);
|
|
|
|
show_problem(line_ptr);
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
num*=10;
|
|
|
|
num+=(*line_ptr)-'0';
|
|
|
|
line_ptr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(*line_ptr)) {
|
|
|
|
fprintf(stderr,"Missing line number line %d\n",line);
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
|
2017-04-24 03:24:30 +00:00
|
|
|
if (linenum) *linenum=num;
|
|
|
|
if (custom_offset) {
|
|
|
|
*custom_offset=offset;
|
|
|
|
if (debug) fprintf(stderr,"CO=%x\n",offset);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2016-05-25 19:07:05 +00:00
|
|
|
return num;
|
2007-08-21 02:26:00 +00:00
|
|
|
}
|
|
|
|
|
2012-05-02 18:21:58 +00:00
|
|
|
static int in_quotes=0,in_rem=0;
|
2007-08-21 02:26:00 +00:00
|
|
|
|
2016-05-25 19:20:48 +00:00
|
|
|
/* note: try to find longest possible token */
|
|
|
|
/* otherwise ATN is turned into AT N */
|
2016-05-25 19:07:05 +00:00
|
|
|
static int find_token(void) {
|
|
|
|
|
|
|
|
int ch,i;
|
|
|
|
|
|
|
|
ch=*line_ptr;
|
|
|
|
|
|
|
|
/* end remarks if end of line */
|
|
|
|
if (in_rem && (ch=='\n')) {
|
|
|
|
in_rem=0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* don't skip whitespace in quotes or remarks */
|
|
|
|
if ((!in_quotes)&&(!in_rem)) {
|
|
|
|
while(ch<=' ') {
|
|
|
|
if ((ch=='\n') || (ch=='\r') || (ch=='\0')) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
line_ptr++;
|
|
|
|
ch=*line_ptr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* toggle quotes mode */
|
|
|
|
if (ch=='\"') in_quotes=!in_quotes;
|
|
|
|
|
|
|
|
/* don't tokenize if in quotes */
|
|
|
|
if ((!in_quotes)&&(!in_rem)) {
|
|
|
|
|
2016-05-25 19:20:48 +00:00
|
|
|
// fprintf(stderr,"%s",line_ptr);
|
2016-05-25 19:07:05 +00:00
|
|
|
for(i=0;i<NUM_TOKENS;i++) {
|
|
|
|
if (!strncmp(line_ptr,applesoft_tokens[i],
|
|
|
|
strlen(applesoft_tokens[i]))) {
|
2016-05-25 19:20:48 +00:00
|
|
|
|
|
|
|
/* HACK: special case to avoid AT/ATN problem */
|
2016-05-31 16:14:36 +00:00
|
|
|
/* Update, apparently actual applesoft uses */
|
|
|
|
/* a similar hack. Also the 'A TO' */
|
|
|
|
/* case which we don't handle because */
|
|
|
|
/* we like sane whitespace. */
|
2016-05-25 19:20:48 +00:00
|
|
|
if ((i==69) && (line_ptr[2]=='N')) continue;
|
|
|
|
// fprintf(stderr,
|
|
|
|
// "Found token %x (%s) %d\n",0x80+i,
|
|
|
|
// applesoft_tokens[i],i);
|
2016-05-25 19:07:05 +00:00
|
|
|
|
|
|
|
line_ptr+=strlen(applesoft_tokens[i]);
|
|
|
|
|
|
|
|
/* REM is 0x32 */
|
|
|
|
if (i==0x32) in_rem=1;
|
|
|
|
|
|
|
|
return 0x80+i;
|
|
|
|
}
|
|
|
|
|
|
|
|
//fprintf(stderr,"%s ",applesoft_tokens[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//fprintf(stderr,"\n");
|
|
|
|
|
|
|
|
/* not a token, just ascii */
|
|
|
|
line_ptr++;
|
|
|
|
return ch;
|
2007-08-21 02:26:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void check_oflo(int size) {
|
|
|
|
|
2016-05-25 19:07:05 +00:00
|
|
|
if (size>MAXSIZE) {
|
|
|
|
fprintf(stderr,"Output file too big!\n");
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
}
|
2007-08-21 02:26:00 +00:00
|
|
|
|
|
|
|
int main(int argc, char **argv) {
|
2016-05-25 19:07:05 +00:00
|
|
|
|
|
|
|
int offset=2,i;
|
|
|
|
|
2017-04-24 03:24:30 +00:00
|
|
|
int linenum=0,custom_offset=0,lastline=0,link_offset;
|
2016-05-25 19:07:05 +00:00
|
|
|
int link_value=0x801; /* start of applesoft program */
|
|
|
|
int token;
|
2017-04-24 02:53:20 +00:00
|
|
|
int c;
|
|
|
|
FILE *fff;
|
|
|
|
|
|
|
|
/* Check command line arguments */
|
|
|
|
while ((c = getopt (argc, argv,"d"))!=-1) {
|
|
|
|
switch (c) {
|
|
|
|
|
|
|
|
case 'd':
|
|
|
|
debug=1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* No file specified, used stdin */
|
|
|
|
if (optind==argc) {
|
|
|
|
fff=stdin;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
fff=fopen(argv[optind],"r");
|
|
|
|
if (fff==NULL) {
|
|
|
|
fprintf(stderr,"Error, could not open %s\n",argv[optind]);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (debug) fprintf(stderr,"Opened file %s\n",argv[optind]);
|
|
|
|
}
|
2016-05-25 19:07:05 +00:00
|
|
|
|
|
|
|
while(1) {
|
|
|
|
/* get line from input file */
|
2017-04-24 02:53:20 +00:00
|
|
|
line_ptr=fgets(input_line,BUFSIZ,fff);
|
2016-05-25 19:07:05 +00:00
|
|
|
line++;
|
|
|
|
if (line_ptr==NULL) break;
|
2016-05-25 19:11:12 +00:00
|
|
|
|
2017-01-04 19:07:39 +00:00
|
|
|
/* VMW extension, skip between 'if 0 and 'endif */
|
|
|
|
if (line_ptr[0]=='\'') {
|
|
|
|
if (!strncmp(line_ptr,"\'.if 0",6)) {
|
|
|
|
while(1) {
|
2017-04-24 02:53:20 +00:00
|
|
|
line_ptr=fgets(input_line,BUFSIZ,fff);
|
2017-01-04 19:07:39 +00:00
|
|
|
line++;
|
|
|
|
if (line_ptr==NULL) break;
|
|
|
|
if (!strncmp(line_ptr,"\'.endif",7)) break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (line_ptr==NULL) break;
|
|
|
|
|
2016-05-31 16:14:36 +00:00
|
|
|
/* VMW extension: use leading ' as a comment char */
|
|
|
|
if (line_ptr[0]=='\'') continue;
|
|
|
|
|
|
|
|
/* skip empty lines */
|
|
|
|
if (line_ptr[0]=='\n') continue;
|
|
|
|
|
2017-04-24 03:24:30 +00:00
|
|
|
get_line_num(&linenum,&custom_offset);
|
2016-05-25 19:11:12 +00:00
|
|
|
if ((linenum>65535) || (linenum<0)) {
|
|
|
|
fprintf(stderr,"Invalid line number %d\n",linenum);
|
|
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
if (linenum<lastline) {
|
|
|
|
fprintf(stderr,"Line counted backwards %d->%d\n",
|
|
|
|
lastline,linenum);
|
2016-05-25 19:07:05 +00:00
|
|
|
exit(-1);
|
|
|
|
}
|
2016-05-25 19:11:12 +00:00
|
|
|
lastline=linenum;
|
|
|
|
|
2016-05-25 19:07:05 +00:00
|
|
|
link_offset=offset;
|
|
|
|
check_oflo(offset+4);
|
|
|
|
output[offset+2]=LOW(linenum);
|
|
|
|
output[offset+3]=HIGH(linenum);
|
|
|
|
offset+=4;
|
|
|
|
|
|
|
|
while(1) {
|
|
|
|
token=find_token();
|
|
|
|
output[offset]=token;
|
2017-04-24 03:24:30 +00:00
|
|
|
if (debug) fprintf(stderr,"%2X ",token);
|
2016-05-25 19:07:05 +00:00
|
|
|
offset++;
|
|
|
|
check_oflo(offset);
|
|
|
|
if (!token) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* remarks end at end of line */
|
|
|
|
in_rem=0;
|
|
|
|
|
|
|
|
/* 2 bytes is to ignore size from beginning of file */
|
|
|
|
link_value=0x801+(offset-2);
|
|
|
|
|
|
|
|
/* point link value to next line */
|
|
|
|
check_oflo(offset+2);
|
2017-04-24 03:24:30 +00:00
|
|
|
if (custom_offset) {
|
|
|
|
output[link_offset]=LOW(custom_offset);
|
|
|
|
output[link_offset+1]=HIGH(custom_offset);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
output[link_offset]=LOW(link_value);
|
|
|
|
output[link_offset+1]=HIGH(link_value);
|
|
|
|
}
|
2016-05-25 19:07:05 +00:00
|
|
|
}
|
|
|
|
/* set last link field to $00 $00 which indicates EOF */
|
|
|
|
check_oflo(offset+2);
|
|
|
|
output[offset]='\0';
|
|
|
|
output[offset+1]='\0';
|
|
|
|
offset+=2;
|
|
|
|
|
|
|
|
/* Set filesize */
|
|
|
|
/* -1 to match observed values */
|
|
|
|
output[0]=LOW(offset-1);
|
|
|
|
output[1]=HIGH(offset-1);
|
|
|
|
/* output our file */
|
|
|
|
for(i=0;i<offset;i++) putchar(output[i]);
|
|
|
|
|
|
|
|
return 0;
|
2007-08-21 02:26:00 +00:00
|
|
|
}
|