2018-05-29 03:33:41 +00:00
|
|
|
package io.github.applecommander.bastokenizer.api;
|
2018-05-06 19:31:36 +00:00
|
|
|
|
2018-05-13 20:41:23 +00:00
|
|
|
import java.io.File;
|
2018-05-06 19:31:36 +00:00
|
|
|
import java.io.FileNotFoundException;
|
|
|
|
import java.io.FileReader;
|
|
|
|
import java.io.IOException;
|
2018-05-13 16:55:36 +00:00
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.InputStreamReader;
|
2018-05-06 19:31:36 +00:00
|
|
|
import java.io.Reader;
|
|
|
|
import java.io.StreamTokenizer;
|
|
|
|
import java.util.LinkedList;
|
|
|
|
import java.util.Optional;
|
|
|
|
import java.util.Queue;
|
|
|
|
|
2018-05-29 03:33:41 +00:00
|
|
|
import io.github.applecommander.bastokenizer.api.model.ApplesoftKeyword;
|
|
|
|
import io.github.applecommander.bastokenizer.api.model.Token;
|
|
|
|
|
2018-05-09 01:25:09 +00:00
|
|
|
/**
|
|
|
|
* The TokenReader, given a text file, generates a series of Tokens (in the compiler sense,
|
|
|
|
* not AppleSoft) for the AppleSoft program.
|
|
|
|
*
|
|
|
|
* @author rob
|
|
|
|
*/
|
2018-05-06 19:31:36 +00:00
|
|
|
public class TokenReader {
|
|
|
|
private boolean hasMore = true;
|
|
|
|
// Internal flag just in case we consume the EOL (see REM for instance)s
|
|
|
|
private boolean needSyntheticEol = false;
|
|
|
|
private Reader reader;
|
|
|
|
private StreamTokenizer tokenizer;
|
|
|
|
|
2018-05-13 20:41:23 +00:00
|
|
|
/** A handy method to generate a list of Tokens from a file name. */
|
2018-05-06 19:31:36 +00:00
|
|
|
public static Queue<Token> tokenize(String filename) throws FileNotFoundException, IOException {
|
|
|
|
try (FileReader fileReader = new FileReader(filename)) {
|
2018-05-13 16:55:36 +00:00
|
|
|
return tokenize(fileReader);
|
|
|
|
}
|
|
|
|
}
|
2018-05-13 20:41:23 +00:00
|
|
|
/** A handy method to generate a list of Tokens from a file. */
|
|
|
|
public static Queue<Token> tokenize(File file) throws FileNotFoundException, IOException {
|
|
|
|
try (FileReader fileReader = new FileReader(file)) {
|
|
|
|
return tokenize(fileReader);
|
|
|
|
}
|
|
|
|
}
|
2018-05-13 16:55:36 +00:00
|
|
|
/** A handy method to generate a list of Tokens from an InputStream. */
|
|
|
|
public static Queue<Token> tokenize(InputStream inputStream) throws IOException {
|
|
|
|
try (InputStreamReader streamReader = new InputStreamReader(inputStream)) {
|
|
|
|
return tokenize(streamReader);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
private static Queue<Token> tokenize(Reader reader) throws IOException {
|
|
|
|
TokenReader tokenReader = new TokenReader(reader);
|
|
|
|
LinkedList<Token> tokens = new LinkedList<>();
|
|
|
|
while (tokenReader.hasMore()) {
|
|
|
|
// Magic number: maximum number of pieces from the StreamTokenizer that may be combined.
|
|
|
|
tokenReader.next(2)
|
|
|
|
.ifPresent(tokens::add);
|
2018-05-06 19:31:36 +00:00
|
|
|
}
|
2018-05-13 16:55:36 +00:00
|
|
|
return tokens;
|
2018-05-06 19:31:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
public TokenReader(Reader reader) {
|
|
|
|
this.reader = reader;
|
|
|
|
this.tokenizer = ApplesoftKeyword.tokenizer(reader);
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean hasMore() {
|
|
|
|
return hasMore;
|
|
|
|
}
|
|
|
|
|
|
|
|
public Optional<Token> next(int depth) throws IOException {
|
|
|
|
// A cheesy attempt to prevent too much looping...
|
|
|
|
if (depth > 0) {
|
|
|
|
if (this.needSyntheticEol) {
|
|
|
|
this.needSyntheticEol = false;
|
|
|
|
int line = tokenizer.lineno();
|
|
|
|
return Optional.of(Token.eol(line));
|
|
|
|
}
|
|
|
|
hasMore = tokenizer.nextToken() != StreamTokenizer.TT_EOF;
|
|
|
|
if (hasMore) {
|
|
|
|
int line = tokenizer.lineno();
|
|
|
|
switch (tokenizer.ttype) {
|
|
|
|
case StreamTokenizer.TT_EOL:
|
|
|
|
return Optional.of(Token.eol(line));
|
|
|
|
case StreamTokenizer.TT_NUMBER:
|
|
|
|
return Optional.of(Token.number(line, tokenizer.nval));
|
|
|
|
case StreamTokenizer.TT_WORD:
|
|
|
|
Optional<ApplesoftKeyword> opt = ApplesoftKeyword.find(tokenizer.sval);
|
2018-05-19 15:39:47 +00:00
|
|
|
// REM is special
|
2018-05-06 19:31:36 +00:00
|
|
|
if (opt.filter(kw -> kw == ApplesoftKeyword.REM).isPresent()) {
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
|
|
while (true) {
|
|
|
|
// Bypass the Tokenizer and just read to EOL for the comment
|
|
|
|
int ch = reader.read();
|
|
|
|
if (ch == '\n') {
|
|
|
|
// Recover to the newline so that the next token is a EOL
|
|
|
|
// This is needed for parsing!
|
|
|
|
this.needSyntheticEol = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
sb.append((char)ch);
|
|
|
|
}
|
|
|
|
return Optional.of(Token.comment(line, sb.toString()));
|
|
|
|
}
|
2018-05-19 15:39:47 +00:00
|
|
|
// If we found an Applesoft token, handle it
|
2018-05-18 02:02:13 +00:00
|
|
|
if (opt.isPresent()) {
|
|
|
|
if (opt.get().parts.size() > 1) {
|
2018-05-19 15:39:47 +00:00
|
|
|
// Pull next token and see if it is the 2nd part ("PR#" == "PR", "#"; checking for the "#")
|
2018-05-18 02:02:13 +00:00
|
|
|
next(depth-1)
|
|
|
|
.filter(t -> opt.get().parts.get(1).equals(t.text))
|
|
|
|
.orElseThrow(() -> new IOException("Expecting: " + opt.get().parts));
|
|
|
|
}
|
2018-05-16 03:22:03 +00:00
|
|
|
return Optional.of(Token.keyword(line, opt.get()));
|
2018-05-21 00:54:11 +00:00
|
|
|
}
|
|
|
|
// Check if we found a directive
|
|
|
|
if (tokenizer.sval.startsWith("$")) {
|
|
|
|
return Optional.of(Token.directive(line, tokenizer.sval));
|
|
|
|
}
|
|
|
|
// Found an identifier (A, A$, A%). Test if it is an array ('A(', 'A$(', 'A%(').
|
|
|
|
String sval = tokenizer.sval;
|
|
|
|
tokenizer.nextToken();
|
|
|
|
if (tokenizer.ttype == '(') {
|
|
|
|
sval += (char)tokenizer.ttype;
|
2018-05-18 02:02:13 +00:00
|
|
|
} else {
|
2018-05-21 00:54:11 +00:00
|
|
|
tokenizer.pushBack();
|
2018-05-06 19:31:36 +00:00
|
|
|
}
|
2018-05-21 00:54:11 +00:00
|
|
|
return Optional.of(Token.ident(line, sval));
|
2018-05-06 19:31:36 +00:00
|
|
|
case '"':
|
|
|
|
return Optional.of(Token.string(line, tokenizer.sval));
|
|
|
|
case '(':
|
|
|
|
case ')':
|
|
|
|
case ',':
|
|
|
|
case ':':
|
|
|
|
case '$':
|
|
|
|
case '#':
|
|
|
|
case ';':
|
|
|
|
case '&':
|
|
|
|
case '=':
|
|
|
|
case '<':
|
|
|
|
case '>':
|
|
|
|
case '*':
|
|
|
|
case '+':
|
|
|
|
case '-':
|
|
|
|
case '/':
|
|
|
|
case '^':
|
|
|
|
return Optional.of(
|
|
|
|
ApplesoftKeyword.find(String.format("%c", tokenizer.ttype))
|
|
|
|
.map(kw -> Token.keyword(line, kw))
|
|
|
|
.orElse(Token.syntax(line, tokenizer.ttype)));
|
|
|
|
default:
|
|
|
|
throw new IOException(String.format(
|
|
|
|
"Unknown! ttype=%d, nval=%f, sval=%s\n", tokenizer.ttype, tokenizer.nval, tokenizer.sval));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Optional.empty();
|
|
|
|
}
|
|
|
|
}
|