/*
 * phase1 -- strip comments and merge multi-lines.
 *
 */


#include "phase1.h"
#include <stdexcept>
#include <stdint.h>

const unsigned char escape = 0xb6;

/*
 * from experimentation, mpw splits on ; after variable expansion; 
 * this splits before.  something stupid like:
 * set q '"'; echo {q} ; "
 * will not be handled correctly.  oh well.
 * (should probably just drop that and we can then combine tokenizing w/ 
 * variable expansion)
 */
%%{
	machine main;
	alphtype unsigned char;


	escape = 0xb6;
	ws = [ \t];
	nl = ('\n' | '\r');

	action add_line {
		/* strip trailing ws */
		while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back();

		if (!scratch.empty()) { 
			std::string tmp = std::move(scratch);
			scratch.clear();
			if (pipe_to) pipe_to(std::move(tmp));
		}
		fgoto main;
	}

	action push_back {
		scratch.push_back(fc);
	}

	action push_back_escape {
		scratch.push_back(escape);
		scratch.push_back(fc);
	}

	comment = '#' (any-nl)*;

	escape_seq =
		escape
		(
			nl ${ /* esc newline */ line++; }
			|
			(any-nl) $push_back_escape
		)
	;


	# single-quoted string.  only escape \n is special.
	# handling is so stupid I'm not going to support it.

	sstring = 
		['] $push_back
		( (any-nl-[']) $push_back )*
		['] $push_back
		$err{
			throw std::runtime_error("### MPW Shell - 's must occur in pairs.");
		}
	;

	# todo -- {{variables}}
	# same quoting logic as ' string
	vstring = 
		'{' $push_back
		( (any-nl-'}') $push_back )*
		'}' $push_back
		$err{
			throw std::runtime_error("### MPW Shell - {s must occur in pairs.");
		}
	;


	# double-quoted string.  
	# escape \n is ignored.  others do nothing.
	dstring =
		["] $push_back
		(
			escape_seq
			|
			vstring
			|
			(any-escape-nl-["{]) $push_back
		)* ["] $push_back
		$err{
			throw std::runtime_error("### MPW Shell - \"s must occur in pairs.");
		}
	;

	# gobble up all the white space...
	coalesce_ws =
		ws+
		<:
		''
		%{ if (!scratch.empty() && scratch.back() != ' ') scratch.push_back(' '); }
		;

	line :=
	(
		sstring
		|
		dstring
		|
		vstring
		|
		escape_seq
		|
		coalesce_ws
		|
		(any-escape-nl-ws-[#'"{]) $push_back
	)*
	comment?
	nl ${ line++; } $add_line
	;

	main :=
		# strip leading whitespace.
		ws*
		<: # left guard -- higher priority to ws.
		any ${ fhold; fgoto line; }
	;

}%%



namespace {
	%% write data;	
}

phase1::phase1() {
	%% write init;
}

void phase1::reset() {
	%% write init;
	scratch.clear();
	// line = 1?
}

void phase1::process(const unsigned char *begin, const unsigned char *end, bool final) {
	
	int start_line;

	const unsigned char *p = begin;
	const unsigned char *pe = end;
	const unsigned char *eof = nullptr;

	if (final)
		eof = pe;

	%% write exec;

	if (cs == main_error) {
		throw std::runtime_error("MPW Shell - Lexer error.");
	}

#if 0
	if (cs != main_start && final) {
		// will this happen?
		throw std::runtime_error("MPW Shell - Lexer error.");
	}
#endif
}