bastools/api/src/main/java/io/github/applecommander/bastokenizer/api/TokenReader.java

package io.github.applecommander.bastokenizer.api;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.util.LinkedList;
import java.util.Optional;
import java.util.Queue;

import io.github.applecommander.bastokenizer.api.model.ApplesoftKeyword;
import io.github.applecommander.bastokenizer.api.model.Token;

/**
 * The TokenReader, given a text file, generates a series of Tokens (in the compiler sense, 
 * not AppleSoft) for the AppleSoft program.
 *  
 * @author rob
 */
public class TokenReader {
	private boolean hasMore = true;
	// Internal flag just in case we consume the EOL (see REM for instance)s
	private boolean needSyntheticEol = false;
	private Reader reader;
	private StreamTokenizer tokenizer;

	/** A handy method to generate a list of Tokens from a file name. */
	public static Queue<Token> tokenize(String filename) throws FileNotFoundException, IOException {
		try (FileReader fileReader = new FileReader(filename)) {
			return tokenize(fileReader);
		}
	}
	/** A handy method to generate a list of Tokens from a file. */
	public static Queue<Token> tokenize(File file) throws FileNotFoundException, IOException {
		try (FileReader fileReader = new FileReader(file)) {
			return tokenize(fileReader);
		}
	}
	/** A handy method to generate a list of Tokens from an InputStream. */
	public static Queue<Token> tokenize(InputStream inputStream) throws IOException {
		try (InputStreamReader streamReader = new InputStreamReader(inputStream)) {
			return tokenize(streamReader);
		}
	}
	private static Queue<Token> tokenize(Reader reader) throws IOException {
		TokenReader tokenReader = new TokenReader(reader);
		LinkedList<Token> tokens = new LinkedList<>();
		while (tokenReader.hasMore()) {
			// Magic number: maximum number of pieces from the StreamTokenizer that may be combined.
			tokenReader.next(2)
					   .ifPresent(tokens::add);
		}
		return tokens;
	}

	public TokenReader(Reader reader) {
		this.reader = reader;
		this.tokenizer = ApplesoftKeyword.tokenizer(reader);
	}
	
	public boolean hasMore() {
		return hasMore;
	}
	
	public Optional<Token> next(int depth) throws IOException {
		// A cheesy attempt to prevent too much looping...
		if (depth > 0) {
			if (this.needSyntheticEol) {
				this.needSyntheticEol = false;
				int line = tokenizer.lineno();
				return Optional.of(Token.eol(line));
			}
			hasMore = tokenizer.nextToken() != StreamTokenizer.TT_EOF;
			if (hasMore) {
				int line = tokenizer.lineno();
				switch (tokenizer.ttype) {
				case StreamTokenizer.TT_EOL:
					return Optional.of(Token.eol(line));
				case StreamTokenizer.TT_NUMBER:
					return Optional.of(Token.number(line, tokenizer.nval));
				case StreamTokenizer.TT_WORD:
					Optional<ApplesoftKeyword> opt = ApplesoftKeyword.find(tokenizer.sval);
					// REM is special
					if (opt.filter(kw -> kw == ApplesoftKeyword.REM).isPresent()) {
						StringBuilder sb = new StringBuilder();
						while (true) {
							// Bypass the Tokenizer and just read to EOL for the comment
							int ch = reader.read();
							if (ch == '\n') {
								// Recover to the newline so that the next token is a EOL
								// This is needed for parsing!
								this.needSyntheticEol = true;
								break;
							}
							sb.append((char)ch);
						}
						return Optional.of(Token.comment(line, sb.toString()));
					}
					// If we found an Applesoft token, handle it
					if (opt.isPresent()) {
						if (opt.get().parts.size() > 1) {
							// Pull next token and see if it is the 2nd part ("PR#" == "PR", "#"; checking for the "#")
							next(depth-1)
								.filter(t -> opt.get().parts.get(1).equals(t.text))
							    .orElseThrow(() -> new IOException("Expecting: " + opt.get().parts));
						}
						return Optional.of(Token.keyword(line, opt.get()));
					}
					// Check if we found a directive
					if (tokenizer.sval.startsWith("$")) {
						return Optional.of(Token.directive(line, tokenizer.sval));
					}
					// Found an identifier (A, A$, A%).  Test if it is an array ('A(', 'A$(', 'A%(').
					String sval = tokenizer.sval;
					tokenizer.nextToken();
					if (tokenizer.ttype == '(') {
						sval += (char)tokenizer.ttype;
					} else {
						tokenizer.pushBack();
					}
					return Optional.of(Token.ident(line, sval));
				case '"':
					return Optional.of(Token.string(line, tokenizer.sval));
				case '(':
				case ')':
				case ',':
				case ':':
				case '$':
				case '#':
				case ';':
				case '&':
				case '=':
				case '<':
				case '>':
				case '*':
				case '+':
				case '-':
				case '/':
				case '^':
					return Optional.of(
							ApplesoftKeyword.find(String.format("%c", tokenizer.ttype))
							   .map(kw -> Token.keyword(line, kw))
							   .orElse(Token.syntax(line, tokenizer.ttype)));
				default:
					throw new IOException(String.format(
						"Unknown! ttype=%d, nval=%f, sval=%s\n", tokenizer.ttype, tokenizer.nval, tokenizer.sval));
				}
			}
		}
		return Optional.empty();
	}
}
Migrated to Gradle; spearated into 'api' and 'tools/bt' projects. Closes #15. 2018-05-29 03:33:41 +00:00			`package io.github.applecommander.bastokenizer.api;`
Initial commit. 2018-05-06 19:31:36 +00:00
Adding command-line. Closes #6. 2018-05-13 20:41:23 +00:00			`import java.io.File;`
Initial commit. 2018-05-06 19:31:36 +00:00			`import java.io.FileNotFoundException;`
			`import java.io.FileReader;`
			`import java.io.IOException;`
Pulling in AppleCommander tweaks. 2018-05-13 16:55:36 +00:00			`import java.io.InputStream;`
			`import java.io.InputStreamReader;`
Initial commit. 2018-05-06 19:31:36 +00:00			`import java.io.Reader;`
			`import java.io.StreamTokenizer;`
			`import java.util.LinkedList;`
			`import java.util.Optional;`
			`import java.util.Queue;`

Migrated to Gradle; spearated into 'api' and 'tools/bt' projects. Closes #15. 2018-05-29 03:33:41 +00:00			`import io.github.applecommander.bastokenizer.api.model.ApplesoftKeyword;`
			`import io.github.applecommander.bastokenizer.api.model.Token;`

Adding comments. 2018-05-09 01:25:09 +00:00			`/**`
			`* The TokenReader, given a text file, generates a series of Tokens (in the compiler sense,`
			`* not AppleSoft) for the AppleSoft program.`
			`*`
			`* @author rob`
			`*/`
Initial commit. 2018-05-06 19:31:36 +00:00			`public class TokenReader {`
			`private boolean hasMore = true;`
			`// Internal flag just in case we consume the EOL (see REM for instance)s`
			`private boolean needSyntheticEol = false;`
			`private Reader reader;`
			`private StreamTokenizer tokenizer;`

Adding command-line. Closes #6. 2018-05-13 20:41:23 +00:00			`/** A handy method to generate a list of Tokens from a file name. */`
Initial commit. 2018-05-06 19:31:36 +00:00			`public static Queue<Token> tokenize(String filename) throws FileNotFoundException, IOException {`
			`try (FileReader fileReader = new FileReader(filename)) {`
Pulling in AppleCommander tweaks. 2018-05-13 16:55:36 +00:00			`return tokenize(fileReader);`
			`}`
			`}`
Adding command-line. Closes #6. 2018-05-13 20:41:23 +00:00			`/** A handy method to generate a list of Tokens from a file. */`
			`public static Queue<Token> tokenize(File file) throws FileNotFoundException, IOException {`
			`try (FileReader fileReader = new FileReader(file)) {`
			`return tokenize(fileReader);`
			`}`
			`}`
Pulling in AppleCommander tweaks. 2018-05-13 16:55:36 +00:00			`/** A handy method to generate a list of Tokens from an InputStream. */`
			`public static Queue<Token> tokenize(InputStream inputStream) throws IOException {`
			`try (InputStreamReader streamReader = new InputStreamReader(inputStream)) {`
			`return tokenize(streamReader);`
			`}`
			`}`
			`private static Queue<Token> tokenize(Reader reader) throws IOException {`
			`TokenReader tokenReader = new TokenReader(reader);`
			`LinkedList<Token> tokens = new LinkedList<>();`
			`while (tokenReader.hasMore()) {`
			`// Magic number: maximum number of pieces from the StreamTokenizer that may be combined.`
			`tokenReader.next(2)`
			`.ifPresent(tokens::add);`
Initial commit. 2018-05-06 19:31:36 +00:00			`}`
Pulling in AppleCommander tweaks. 2018-05-13 16:55:36 +00:00			`return tokens;`
Initial commit. 2018-05-06 19:31:36 +00:00			`}`

			`public TokenReader(Reader reader) {`
			`this.reader = reader;`
			`this.tokenizer = ApplesoftKeyword.tokenizer(reader);`
			`}`

			`public boolean hasMore() {`
			`return hasMore;`
			`}`

			`public Optional<Token> next(int depth) throws IOException {`
			`// A cheesy attempt to prevent too much looping...`
			`if (depth > 0) {`
			`if (this.needSyntheticEol) {`
			`this.needSyntheticEol = false;`
			`int line = tokenizer.lineno();`
			`return Optional.of(Token.eol(line));`
			`}`
			`hasMore = tokenizer.nextToken() != StreamTokenizer.TT_EOF;`
			`if (hasMore) {`
			`int line = tokenizer.lineno();`
			`switch (tokenizer.ttype) {`
			`case StreamTokenizer.TT_EOL:`
			`return Optional.of(Token.eol(line));`
			`case StreamTokenizer.TT_NUMBER:`
			`return Optional.of(Token.number(line, tokenizer.nval));`
			`case StreamTokenizer.TT_WORD:`
			`Optional<ApplesoftKeyword> opt = ApplesoftKeyword.find(tokenizer.sval);`
Updated what defines a "word" to simplify logic a bit. Closes #10. 2018-05-19 15:39:47 +00:00			`// REM is special`
Initial commit. 2018-05-06 19:31:36 +00:00			`if (opt.filter(kw -> kw == ApplesoftKeyword.REM).isPresent()) {`
			`StringBuilder sb = new StringBuilder();`
			`while (true) {`
			`// Bypass the Tokenizer and just read to EOL for the comment`
			`int ch = reader.read();`
			`if (ch == '\n') {`
			`// Recover to the newline so that the next token is a EOL`
			`// This is needed for parsing!`
			`this.needSyntheticEol = true;`
			`break;`
			`}`
			`sb.append((char)ch);`
			`}`
			`return Optional.of(Token.comment(line, sb.toString()));`
			`}`
Updated what defines a "word" to simplify logic a bit. Closes #10. 2018-05-19 15:39:47 +00:00			`// If we found an Applesoft token, handle it`
Beginning with variable report. Need to handle arrays yet. #10. 2018-05-18 02:02:13 +00:00			`if (opt.isPresent()) {`
			`if (opt.get().parts.size() > 1) {`
Updated what defines a "word" to simplify logic a bit. Closes #10. 2018-05-19 15:39:47 +00:00			`// Pull next token and see if it is the 2nd part ("PR#" == "PR", "#"; checking for the "#")`
Beginning with variable report. Need to handle arrays yet. #10. 2018-05-18 02:02:13 +00:00			`next(depth-1)`
			`.filter(t -> opt.get().parts.get(1).equals(t.text))`
			`.orElseThrow(() -> new IOException("Expecting: " + opt.get().parts));`
			`}`
Bypassed bug somewhat. If two parts to a token were found, the Optional logic was not correct. Instead of fixing, just returned the Token from the if statement. Closes #9. 2018-05-16 03:22:03 +00:00			`return Optional.of(Token.keyword(line, opt.get()));`
Adding directives and the '$embed' directive. Closes #11. 2018-05-21 00:54:11 +00:00			`}`
			`// Check if we found a directive`
			`if (tokenizer.sval.startsWith("$")) {`
			`return Optional.of(Token.directive(line, tokenizer.sval));`
			`}`
			`// Found an identifier (A, A$, A%). Test if it is an array ('A(', 'A$(', 'A%(').`
			`String sval = tokenizer.sval;`
			`tokenizer.nextToken();`
			`if (tokenizer.ttype == '(') {`
			`sval += (char)tokenizer.ttype;`
Beginning with variable report. Need to handle arrays yet. #10. 2018-05-18 02:02:13 +00:00			`} else {`
Adding directives and the '$embed' directive. Closes #11. 2018-05-21 00:54:11 +00:00			`tokenizer.pushBack();`
Initial commit. 2018-05-06 19:31:36 +00:00			`}`
Adding directives and the '$embed' directive. Closes #11. 2018-05-21 00:54:11 +00:00			`return Optional.of(Token.ident(line, sval));`
Initial commit. 2018-05-06 19:31:36 +00:00			`case '"':`
			`return Optional.of(Token.string(line, tokenizer.sval));`
			`case '(':`
			`case ')':`
			`case ',':`
			`case ':':`
			`case '$':`
			`case '#':`
			`case ';':`
			`case '&':`
			`case '=':`
			`case '<':`
			`case '>':`
			`case '*':`
			`case '+':`
			`case '-':`
			`case '/':`
			`case '^':`
			`return Optional.of(`
			`ApplesoftKeyword.find(String.format("%c", tokenizer.ttype))`
			`.map(kw -> Token.keyword(line, kw))`
			`.orElse(Token.syntax(line, tokenizer.ttype)));`
			`default:`
			`throw new IOException(String.format(`
			`"Unknown! ttype=%d, nval=%f, sval=%s\n", tokenizer.ttype, tokenizer.nval, tokenizer.sval));`
			`}`
			`}`
			`}`
			`return Optional.empty();`
			`}`
			`}`