commit 3dd328cb1a910dcb48233c9a418496ebf80fac37 Author: Rob Greene Date: Sun May 6 14:31:36 2018 -0500 Initial commit. diff --git a/bastokenizer/.gitignore b/bastokenizer/.gitignore new file mode 100644 index 0000000..8bd3a05 --- /dev/null +++ b/bastokenizer/.gitignore @@ -0,0 +1,4 @@ +/target/ +/.settings/ +/.classpath +/.project diff --git a/bastokenizer/IDEAS.txt b/bastokenizer/IDEAS.txt new file mode 100644 index 0000000..b4214ef --- /dev/null +++ b/bastokenizer/IDEAS.txt @@ -0,0 +1,38 @@ +Ideas +===== + +* Get simple solution working. +* Allow removal of REM statements. +* Allow code comments (' for basic?) that don't make it into resulting code. +* Compress lines to some maximum byte size. Needs to be aware of line numbers and references. +* Remove line numbers and use labels. +* Move constants to variables. +* Optimize expressions + + +Metadata (v1) +======== + +Program +- Lines + - Line number + - Statements + - Statement + - Tokens + - Strings (including numbers and variables!) + + +Metadata (v2) +======== + +Program +- Routines + - Label (default = "__main__" or something) + - Statements + - Distinct statements with expression trees + - LET + - FOR + - CALL + - POKE + - PRINT + - etc diff --git a/bastokenizer/pom.xml b/bastokenizer/pom.xml new file mode 100644 index 0000000..7b14a4b --- /dev/null +++ b/bastokenizer/pom.xml @@ -0,0 +1,16 @@ + + + 4.0.0 + net.sf.applecommander + bastokenizer + 0.0.1-SNAPSHOT + AppleSoft BASIC Tokenizer + Experiments with generating an AppleSoft B/BAS tokenized "binary". + + + 1.8 + 1.8 + + \ No newline at end of file diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/ApplesoftKeyword.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/ApplesoftKeyword.java new file mode 100644 index 0000000..49d6029 --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/ApplesoftKeyword.java @@ -0,0 +1,200 @@ +package net.sf.applecommander.bastokenizer; + +import java.io.IOException; +import java.io.Reader; +import java.io.StreamTokenizer; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +public enum ApplesoftKeyword { + END(0x80, "END"), + FOR(0x81, "FOR"), + NEXT(0x82, "NEXT"), + DATA(0x83, "DATA"), + INPUT(0x84, "INPUT"), + DEL(0x85, "DEL"), + DIM(0x86, "DIM"), + READ(0x87, "READ"), + GR(0x88, "GR"), + TEXT(0x89, "TEXT"), + PR(0x8A, "PR#"), + IN(0x8B, "IN#"), + CALL(0x8C, "CALL"), + PLOT(0x8D, "PLOT"), + HLIN(0x8E, "HLIN"), + VLIN(0x8F, "VLIN"), + HGR2(0x90, "HGR2"), + HGR(0x91, "HGR"), + HCOLOR(0x92, "HCOLOR="), + HPLOT(0x93, "HPLOT"), + DRAW(0x94, "DRAW"), + XDRAW(0x95, "XDRAW"), + HTAB(0x96, "HTAB"), + HOME(0x97, "HOME"), + ROT(0x98, "ROT="), + SCALE(0x99, "SCALE="), + SHLOAD(0x9A, "SHLOAD"), + TRACE(0x9B, "TRACE"), + NOTRACE(0x9C, "NOTRACE"), + NORMAL(0x9D, "NORMAL"), + INVERSE(0x9E, "INVERSE"), + FLASH(0x9F, "FLASH"), + COLOR(0xA0, "COLOR="), + POP(0xA1, "POP"), + VTAB(0xA2, "VTAB"), + HIMEM(0xA3, "HIMEM:"), + LOMEM(0xA4, "LOMEM:"), + ONERR(0xA5, "ONERR"), + RESUME(0xA6, "RESUME"), + RECALL(0xA7, "RECALL"), + STORE(0xA8, "STORE"), + SPEED(0xA9, "SPEED="), + LET(0xAA, "LET"), + GOTO(0xAB, "GOTO"), + RUN(0xAC, "RUN"), + IF(0xAD, "IF"), + RESTORE(0xAE, "RESTORE"), + amp(0xAF, "&"), + GOSUB(0xB0, "GOSUB"), + RETURN(0xB1, "RETURN"), + REM(0xB2, "REM"), + STOP(0xB3, "STOP"), + ON(0xB4, "ON"), + WAIT(0xB5, "WAIT"), + LOAD(0xB6, "LOAD"), + SAVE(0xB7, "SAVE"), + DEF(0xB8, "DEF"), + POKE(0xB9, "POKE"), + PRINT(0xBA, "PRINT"), + CONT(0xBB, "CONT"), + LIST(0xBC, "LIST"), + CLEAR(0xBD, "CLEAR"), + GET(0xBE, "GET"), + NEW(0xBF, "NEW"), + TAB(0xC0, "TAB("), + TO(0xC1, "TO"), + FN(0xC2, "FN"), + SPC(0xC3, "SPC("), + THEN(0xC4, "THEN"), + AT(0xC5, "AT"), + NOT(0xC6, "NOT"), + STEP(0xC7, "STEP"), + add(0xC8, "+"), + sub(0xC9, "-"), + mul(0xCA, "*"), + div(0xCB, "/"), + pow(0xCC, "^"), + AND(0xCD, "AND"), + OR(0xCE, "OR"), + gt(0xCF, ">"), + eq(0xD0, "="), + lt(0xD1, "<"), + SGN(0xD2, "SGN"), + INT(0xD3, "INT"), + ABS(0xD4, "ABS"), + USR(0xD5, "USR"), + FRE(0xD6, "FRE"), + SCRN(0xD7, "SCRN("), + PDL(0xD8, "PDL"), + POS(0xD9, "POS"), + SQR(0xDA, "SQR"), + RND(0xDB, "RND"), + LOG(0xDC, "LOG"), + EXP(0xDD, "EXP"), + COS(0xDE, "COS"), + SIN(0xDF, "SIN"), + TAN(0xE0, "TAN"), + ATN(0xE1, "ATN"), + PEEK(0xE2, "PEEK"), + LEN(0xE3, "LEN"), + STR(0xE4, "STR$"), + VAL(0xE5, "VAL"), + ASC(0xE6, "ASC"), + CHR(0xE7, "CHR$"), + LEFT(0xE8, "LEFT$"), + RIGHT(0xE9, "RIGHT$"), + MID(0xEA, "MID$"); + + /** + * The AppleSoft token value. Token is overloaded, so "code" is good enough. + */ + public final int code; + /** + * Full text of the token. + */ + public final String text; + /** + * Token parts as seen by the StreamTokenizer. + */ + public final List parts; + /** + * Indicates that this needs _just_ a closing right parenthesis since the + * opening left parenthesis is included in the token + */ + public boolean needsRParen; + + private ApplesoftKeyword(int code, String text) { + this.code = code; + this.text = text; + + try { + // A bit brute-force, but should always match the tokenizer configuration! + List list = new ArrayList<>(); + StreamTokenizer t = tokenizer(new StringReader(text)); + while (t.nextToken() != StreamTokenizer.TT_EOF) { + switch (t.ttype) { + case StreamTokenizer.TT_WORD: + list.add(t.sval); + break; + default: + list.add(String.format("%c", t.ttype)); + break; + } + } + this.parts = Collections.unmodifiableList(list); + this.needsRParen = parts.contains("("); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + public boolean equalsIgnoreCase(String value) { + return this.text.equalsIgnoreCase(value); + } + + @Override + public String toString() { + return String.format("%s (%02x)", text, code); + } + + /** Utility method to create a shared definition for AppleSoft file parsing. */ + public static StreamTokenizer tokenizer(Reader r) { + StreamTokenizer tokenizer = new StreamTokenizer(r); + tokenizer.resetSyntax(); + tokenizer.wordChars('a', 'z'); + tokenizer.wordChars('A', 'Z'); + tokenizer.wordChars(128 + 32, 255); + tokenizer.whitespaceChars(0, ' '); + tokenizer.quoteChar('"'); + tokenizer.parseNumbers(); + // This resets part of parseNumbers to match AppleSoft tokenization! + tokenizer.ordinaryChar('-'); + tokenizer.eolIsSignificant(true); + return tokenizer; + } + + /** Utility method to locate a keyword ignoring case. */ + public static Optional find(String value) { + Objects.requireNonNull(value); + for (ApplesoftKeyword kw : values()) { + if (value.equalsIgnoreCase(kw.parts.get(0))) { + return Optional.of(kw); + } + } + return Optional.empty(); + } +} diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Line.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Line.java new file mode 100644 index 0000000..cd1f3ab --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Line.java @@ -0,0 +1,28 @@ +package net.sf.applecommander.bastokenizer; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +public class Line { + public final int lineNumber; + public final List statements = new ArrayList<>(); + + public Line(int lineNumber) { + this.lineNumber = lineNumber; + } + + public void prettyPrint(PrintStream ps) { + boolean first = true; + for (Statement statement : statements) { + if (first) { + first = false; + ps.printf("%5d ", lineNumber); + } else { + ps.printf("%5s ", ":"); + } + statement.prettyPrint(ps); + ps.println(); + } + } +} diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Main.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Main.java new file mode 100644 index 0000000..b5de37e --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Main.java @@ -0,0 +1,20 @@ +package net.sf.applecommander.bastokenizer; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Queue; + +public class Main { + public static void main(String[] args) throws FileNotFoundException, IOException { + if (args.length != 1) { + System.err.println("Please include a file to work on."); + System.exit(1); + } + + Queue tokens = TokenReader.tokenize(args[0]); + System.out.println(tokens.toString()); + Parser parser = new Parser(tokens); + Program program = parser.parse(); + program.prettyPrint(System.out); + } +} diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Parser.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Parser.java new file mode 100644 index 0000000..3c7f4ac --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Parser.java @@ -0,0 +1,59 @@ +package net.sf.applecommander.bastokenizer; + +import java.util.Objects; +import java.util.Queue; + +import net.sf.applecommander.bastokenizer.Token.Type; + +public class Parser { + private final Queue tokens; + + public Parser(Queue tokens) { + Objects.requireNonNull(tokens); + this.tokens = tokens; + } + + public Program parse() { + Program program = new Program(); + while (!tokens.isEmpty()) { + Line line = readLine(); + program.lines.add(line); + } + return program; + } + + public Line readLine() { + Line line = new Line(expectNumber()); + while (!tokens.isEmpty() && tokens.peek().type != Type.EOL) { + Statement statement = readStatement(); + if (statement != null) { + line.statements.add(statement); + } else { + break; + } + } + if (!tokens.isEmpty() && tokens.peek().type == Type.EOL) { + tokens.remove(); // Skip that EOL + } + return line; + } + + public Statement readStatement() { + Statement statement = new Statement(); + while (!tokens.isEmpty()) { + if (tokens.peek().type == Type.EOL) break; + Token t = tokens.remove(); + if (":".equals(t.text)) break; + statement.tokens.add(t); + } + return statement; + } + + public int expectNumber() { + Token c = tokens.remove(); + if (c.type != Type.NUMBER) { + throw new RuntimeException("Expected a number in line #" + c.line); + } + return c.number.intValue(); + } +} diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Program.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Program.java new file mode 100644 index 0000000..eb3884b --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Program.java @@ -0,0 +1,15 @@ +package net.sf.applecommander.bastokenizer; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +public class Program { + public final List lines = new ArrayList<>(); + + public void prettyPrint(PrintStream ps) { + for (Line line : lines) { + line.prettyPrint(ps); + } + } +} diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Statement.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Statement.java new file mode 100644 index 0000000..ac90153 --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Statement.java @@ -0,0 +1,15 @@ +package net.sf.applecommander.bastokenizer; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +public class Statement { + public final List tokens = new ArrayList<>(); + + public void prettyPrint(PrintStream ps) { + for (Token token : tokens) { + token.prettyPrint(ps); + } + } +} diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Token.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Token.java new file mode 100644 index 0000000..01b39cf --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/Token.java @@ -0,0 +1,87 @@ +package net.sf.applecommander.bastokenizer; + +import java.io.PrintStream; + +public class Token { + public final int line; + public final Type type; + public final ApplesoftKeyword keyword; + public final Double number; + public final String text; + + private Token(int line, Type type, ApplesoftKeyword keyword, Double number, String text) { + this.line = line; + this.type = type; + this.keyword = keyword; + this.number = number; + this.text = text; + } + @Override + public String toString() { + switch (type) { + case EOL: + return type.toString(); + case KEYWORD: + return keyword.toString(); + case NUMBER: + return String.format("%s(%f)", type, number); + default: + return String.format("%s(%s)", type, text); + } + } + + public void prettyPrint(PrintStream ps) { + switch (type) { + case EOL: + ps.print(""); + break; + case COMMENT: + ps.printf(" REM %s", text); + break; + case STRING: + ps.printf("\"%s\"", text); + break; + case KEYWORD: + ps.printf(" %s ", keyword.text); + break; + case IDENT: + case SYNTAX: + ps.print(text); + break; + case NUMBER: + if (Math.rint(number) == number) { + ps.print(number.intValue()); + } else { + ps.print(number); + } + break; + } + } + + public static Token eol(int line) { + return new Token(line, Type.EOL, null, null, null); + } + public static Token number(int line, Double number) { + return new Token(line, Type.NUMBER, null, number, null); + } + public static Token ident(int line, String text) { + return new Token(line, Type.IDENT, null, null, text); + } + public static Token comment(int line, String text) { + return new Token(line, Type.COMMENT, null, null, text); + } + public static Token string(int line, String text) { + return new Token(line, Type.STRING, null, null, text); + } + public static Token keyword(int line, ApplesoftKeyword keyword) { + // Note that the text component is useful to have for parsing, so we replicate it... + return new Token(line, Type.KEYWORD, keyword, null, keyword.text); + } + public static Token syntax(int line, int ch) { + return new Token(line, Type.SYNTAX, null, null, String.format("%c", ch)); + } + + public static enum Type { + EOL, NUMBER, IDENT, COMMENT, STRING, KEYWORD, SYNTAX + } +} \ No newline at end of file diff --git a/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/TokenReader.java b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/TokenReader.java new file mode 100644 index 0000000..8c78e38 --- /dev/null +++ b/bastokenizer/src/main/java/net/sf/applecommander/bastokenizer/TokenReader.java @@ -0,0 +1,114 @@ +package net.sf.applecommander.bastokenizer; + +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.Reader; +import java.io.StreamTokenizer; +import java.util.LinkedList; +import java.util.Optional; +import java.util.Queue; + +public class TokenReader { + private boolean hasMore = true; + // Internal flag just in case we consume the EOL (see REM for instance)s + private boolean needSyntheticEol = false; + private Reader reader; + private StreamTokenizer tokenizer; + + public static Queue tokenize(String filename) throws FileNotFoundException, IOException { + try (FileReader fileReader = new FileReader(filename)) { + TokenReader tokenReader = new TokenReader(fileReader); + LinkedList tokens = new LinkedList<>(); + while (tokenReader.hasMore()) { + // Magic number: maximum number of pieces from the StreamTokenizer that may be combined. + tokenReader.next(2) + .ifPresent(tokens::add); + } + return tokens; + } + } + + public TokenReader(Reader reader) { + this.reader = reader; + this.tokenizer = ApplesoftKeyword.tokenizer(reader); + } + + public boolean hasMore() { + return hasMore; + } + + public Optional next(int depth) throws IOException { + // A cheesy attempt to prevent too much looping... + if (depth > 0) { + if (this.needSyntheticEol) { + this.needSyntheticEol = false; + int line = tokenizer.lineno(); + return Optional.of(Token.eol(line)); + } + hasMore = tokenizer.nextToken() != StreamTokenizer.TT_EOF; + if (hasMore) { + int line = tokenizer.lineno(); + switch (tokenizer.ttype) { + case StreamTokenizer.TT_EOL: + return Optional.of(Token.eol(line)); + case StreamTokenizer.TT_NUMBER: + return Optional.of(Token.number(line, tokenizer.nval)); + case StreamTokenizer.TT_WORD: + Optional opt = ApplesoftKeyword.find(tokenizer.sval); + if (opt.filter(kw -> kw == ApplesoftKeyword.REM).isPresent()) { + StringBuilder sb = new StringBuilder(); + while (true) { + // Bypass the Tokenizer and just read to EOL for the comment + int ch = reader.read(); + if (ch == '\n') { + // Recover to the newline so that the next token is a EOL + // This is needed for parsing! + this.needSyntheticEol = true; + break; + } + sb.append((char)ch); + } + return Optional.of(Token.comment(line, sb.toString())); + } + // Optional and exceptions don't play well. :-/ + if (opt.isPresent() && opt.get().parts.size() > 1) { + // Pull next token and see if it is the 2nd part ("MID$" == "MID", "$"; checking for the "$") + next(depth-1) + .filter(t -> opt.get().parts.get(1).equals(t.text)) + .orElseThrow(() -> new IOException("Expecting: " + opt.get().parts)); + } + return Optional.of(opt + .map(kw -> Token.keyword(line, kw)) + .orElse(Token.ident(line, tokenizer.sval))); + case '"': + return Optional.of(Token.string(line, tokenizer.sval)); + case '(': + case ')': + case ',': + case ':': + case '$': + case '#': + case ';': + case '&': + case '=': + case '<': + case '>': + case '*': + case '+': + case '-': + case '/': + case '^': + return Optional.of( + ApplesoftKeyword.find(String.format("%c", tokenizer.ttype)) + .map(kw -> Token.keyword(line, kw)) + .orElse(Token.syntax(line, tokenizer.ttype))); + default: + throw new IOException(String.format( + "Unknown! ttype=%d, nval=%f, sval=%s\n", tokenizer.ttype, tokenizer.nval, tokenizer.sval)); + } + } + } + return Optional.empty(); + } +} diff --git a/bastokenizer/src/test/resources/gravestone.bas b/bastokenizer/src/test/resources/gravestone.bas new file mode 100755 index 0000000..888a6eb --- /dev/null +++ b/bastokenizer/src/test/resources/gravestone.bas @@ -0,0 +1,22 @@ +10 HGR +20 GOSUB 200 +30 GOSUB 300 +90 END +200 HCOLOR= 1 +210 FOR Y = 90 TO 159 STEP 4 +220 FOR X = 0 TO 279 STEP 8 +230 HPLOT X,Y TO X +3,Y +240 NEXT X,Y +250 FOR Y = 92 TO 159 STEP 4 +260 FOR X = 5 TO 276 STEP 8 +270 HPLOT X,Y TO X +3,Y +280 NEXT X,Y +290 RETURN +300 HCOLOR= 3 +310 FOR D = 0 TO 3.14159 STEP 0.02 +320 X = 139 -35 * COS(D) +330 Y = 90 -20 * SIN(D) +340 HPLOT X,Y TO X,140 +345 IF INT(Y) < > INT(OY) THEN PRINT "(x,y)="; INT(X);","; INT(Y):OY = Y +350 NEXT D +360 RETURN \ No newline at end of file diff --git a/bastokenizer/src/test/resources/jewel-draw1.bas b/bastokenizer/src/test/resources/jewel-draw1.bas new file mode 100755 index 0000000..0c9f272 --- /dev/null +++ b/bastokenizer/src/test/resources/jewel-draw1.bas @@ -0,0 +1,23 @@ +10 HGR : POKE -16302,0 +15 GOSUB 200: REM background +20 D1 = 70:D2 = 77:D3 = 80 +30 DEF FN FX(A) = COS(D) *Z +40 DEF FN FY(A) = SIN(D) *Z +50 FOR D = 0 TO 6.28 STEP 0.01 +55 HCOLOR= 4 +60 Z = 60: GOSUB 100 +80 NEXT D +90 PRINT CHR$(4);"-jewel-draw2" +100 X = FN FX(1):Y = FN FY(1) +110 HPLOT 140,96 TO 140 -X,96 -Y +120 RETURN +200 HCOLOR= 6 +210 FOR Y = 0 TO 191 STEP 4 +220 FOR X = 0 TO 279 STEP 8 +230 HPLOT X,Y TO X +3,Y +240 NEXT X,Y +250 FOR Y = 2 TO 191 STEP 4 +260 FOR X = 3 TO 276 STEP 8 +270 HPLOT X,Y TO X +3,Y +280 NEXT X,Y +290 RETURN diff --git a/bastokenizer/src/test/resources/jewel-draw2.bas b/bastokenizer/src/test/resources/jewel-draw2.bas new file mode 100755 index 0000000..a9d0fc5 --- /dev/null +++ b/bastokenizer/src/test/resources/jewel-draw2.bas @@ -0,0 +1,15 @@ +20 D1 = 60:D2 = 73:D3 = 77 +30 DEF FN FX(A) = COS(D) *Z +40 DEF FN FY(A) = SIN(D) *Z +50 FOR D = 0 TO 6.28 STEP 0.01 +55 HCOLOR= 3 +60 Z = 55: GOSUB 100 +65 HCOLOR= 1 +70 Z = 53: GOSUB 100 +80 NEXT D +82 HCOLOR= 7:Z = D3:: FOR D = 0 TO 6.28 STEP 0.1 +83 X = FN FX(1):Y = FN FY(1): HPLOT 140 -D1 * COS(D),96 -D1 * SIN(D) TO 140 -X,96 -Y: NEXT D +90 END +100 X = FN FX(1):Y = FN FY(1) +110 HPLOT 140,96 TO 140 -X,96 -Y +120 RETURN \ No newline at end of file diff --git a/bastokenizer/src/test/resources/test.bas b/bastokenizer/src/test/resources/test.bas new file mode 100644 index 0000000..28f154b --- /dev/null +++ b/bastokenizer/src/test/resources/test.bas @@ -0,0 +1,3 @@ +10 REM BEGIN TEST +20 PRINT "JUST A TEST" +30 REM END TEST