Modified the parser to work on the entire input

Previously it was going line by line, but that makes it hard to
properly parse multiline comments, so I modified it to include line
terminators in the token stream.  I also added parsing of /* */
and | \n comment types.  There is still a problem with line numbers
in the post-parsing phases, but they seem correct in the parser/lexer
stage.  It's still not able to parse the syscall.s file from Computie
but it's mostly just issues with named constants preceeded by a
"#" or "-" character.  As for the encoding stage, it has a problem
with a move instruction that uses a label.
This commit is contained in:
transistor 2022-05-15 20:44:36 -07:00
parent 586a16509f
commit 62a484d317
3 changed files with 138 additions and 46 deletions

View File

@ -11,12 +11,17 @@ fn main() {
let filename = env::args().nth(1).unwrap();
let text = fs::read_to_string(filename).unwrap();
let words = assembler.assemble_words(&text).unwrap();
println!("Output:");
for word in words.iter() {
print!("{:04x} ", word);
}
println!("");
match assembler.assemble_words(&text) {
Ok(words) => {
println!("Output:");
for word in words.iter() {
print!("{:04x} ", word);
}
println!("");
},
Err(err) => {
println!("{}", err.msg);
},
};
}

View File

@ -31,6 +31,7 @@ pub enum Disallow {
NoARegImmediateOrPC = 0x0702,
NoRegsPrePostOrImmediate = 0x011B,
NoImmediateOrPC = 0x0700,
OnlyAReg = 0x07FD,
}
impl Disallow {
@ -113,18 +114,8 @@ impl M68kAssembler {
}
fn parse(&mut self, text: &str) -> Result<Vec<(usize, AssemblyLine)>, Error> {
let mut output = vec![];
let iter = text.split_terminator("\n");
for (lineno, line_text) in iter.enumerate() {
let mut parser = AssemblyParser::new(lineno, line_text);
let parsed_line = parser.parse_line()?;
if let Some(line) = parsed_line {
output.push((lineno, line));
}
}
Ok(output)
let mut parser = AssemblyParser::new(text);
parser.parse()
}
fn apply_relocations(&mut self) -> Result<(), Error> {
@ -212,12 +203,18 @@ impl M68kAssembler {
let operation_size = get_size_from_mneumonic(mneumonic).ok_or_else(|| Error::new(&format!("error at line {}: expected a size specifier (b/w/l)", lineno)));
match &mneumonic[..mneumonic.len() - 1] {
"addi" | "addai" => {
"addi" => {
self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
},
"add" | "adda" => {
"addai" => {
self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::OnlyAReg)?;
},
"add" => {
self.convert_common_dreg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?;
},
"adda" => {
self.convert_common_areg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?;
},
"andi" => {
if !self.check_convert_flags_instruction(lineno, 0x23C, 0x27C, args)? {
self.convert_common_immediate_instruction(lineno, 0x0200, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
@ -291,12 +288,18 @@ impl M68kAssembler {
self.convert_common_shift_instruction(lineno, mneumonic, 0xE010, args, operation_size?)?;
},
"subi" | "subai" => {
"subi" => {
self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
},
"sub" | "suba" => {
"subai" => {
self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::OnlyAReg)?;
},
"sub" => {
self.convert_common_dreg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?;
},
"suba" => {
self.convert_common_areg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?;
},
// TODO complete remaining instructions
_ => return Err(Error::new(&format!("unrecognized instruction at line {}: {:?}", lineno, mneumonic))),
@ -315,8 +318,16 @@ impl M68kAssembler {
}
fn convert_common_dreg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> {
self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoAReg)
}
fn convert_common_areg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> {
self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoDReg)
}
fn convert_common_reg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow, disallow_reg: Disallow) -> Result<(), Error> {
expect_args(lineno, args, 2)?;
let (direction, reg, operand) = convert_reg_and_other(lineno, args, Disallow::NoAReg)?;
let (direction, reg, operand) = convert_reg_and_other(lineno, args, disallow_reg)?;
let (effective_address, additional_words) = convert_target(lineno, operand, operation_size, disallow)?;
self.output.push(opcode | encode_size(operation_size) | direction | (reg << 9) | effective_address);
self.output.extend(additional_words);
@ -419,12 +430,13 @@ fn convert_target(lineno: usize, operand: &AssemblyOperand, size: Size, disallow
if name.starts_with("a") {
let reg = expect_reg_num(lineno, name)?;
return Ok(((0b100 << 3) | reg, vec![]));
} else if name == "sp" {
return Ok((0b100111, vec![]));
}
}
}
Err(Error::new(&format!("error at line {}: pre-decrement operator can only be used with a single address register", lineno)))
},
// TODO complete remaining types
_ => Err(Error::new(&format!("not implemented: {:?}", operand))),
}
}
@ -474,7 +486,19 @@ fn convert_indirect(lineno: usize, args: &[AssemblyOperand], disallow: Disallow)
Ok(((0b101 << 3) | reg, convert_immediate(lineno, *offset, Size::Word)?))
}
},
// TODO add the index register mode
&[AssemblyOperand::Immediate(offset), AssemblyOperand::Register(name), AssemblyOperand::Register(index)] => {
let index_reg = expect_reg_num(lineno, index)?;
let da_select = if index.starts_with("a") { 1 << 15 } else { 0 };
if name == "pc" {
disallow.check(lineno, Disallow::NoPCRelativeIndex)?;
Ok((0b111011, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)]))
} else {
disallow.check(lineno, Disallow::NoIndirectIndexReg)?;
let reg = expect_address_reg_num(lineno, name)?;
Ok(((0b110 << 3) | reg, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)]))
}
},
// TODO add the MC68020 address options
_ => {
Err(Error::new(&format!("error at line {}: expected valid indirect addressing mode, but found {:?}", lineno, args)))
}
@ -498,14 +522,14 @@ fn convert_reg_and_other<'a>(lineno: usize, args: &'a [AssemblyOperand], disallo
fn convert_immediate(lineno: usize, value: usize, size: Size) -> Result<Vec<u16>, Error> {
match size {
Size::Byte => {
if value < u8::MAX as usize {
if value <= u8::MAX as usize {
Ok(vec![value as u16])
} else {
Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u8::MAX, value)))
}
},
Size::Word => {
if value < u16::MAX as usize {
if value <= u16::MAX as usize {
Ok(vec![value as u16])
} else {
Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u16::MAX, value)))

View File

@ -28,16 +28,32 @@ pub struct AssemblyParser<'input> {
}
impl<'input> AssemblyParser<'input> {
pub fn new(lineno: usize, input: &'input str) -> Self {
pub fn new(input: &'input str) -> Self {
Self {
lexer: AssemblyLexer::new(lineno, input),
lexer: AssemblyLexer::new(input),
}
}
pub fn parse_line(&mut self) -> Result<Option<AssemblyLine>, Error> {
let token = match self.lexer.get_next() {
Some(token) => token,
None => return Ok(None),
pub fn parse(&mut self) -> Result<Vec<(usize, AssemblyLine)>, Error> {
let mut output = vec![];
loop {
let lineno = self.lexer.get_next_lineno();
if let Some(line) = self.parse_line()? {
output.push((lineno, line));
} else {
break;
}
}
Ok(output)
}
fn parse_line(&mut self) -> Result<Option<AssemblyLine>, Error> {
let token = loop {
match self.lexer.get_next() {
Some(token) if token == "\n" => { },
Some(token) => { break token; }
None => { return Ok(None); },
}
};
let result = match token.as_str() {
@ -67,12 +83,21 @@ impl<'input> AssemblyParser<'input> {
fn parse_list_of_words(&mut self) -> Result<Vec<String>, Error> {
let mut list = vec![];
// If we're already at the end of the line, then it's an empty list, so return
let next = self.lexer.peek();
if next.is_none() || next.as_ref().unwrap() == "\n" {
return Ok(list);
}
loop {
list.push(self.lexer.expect_next()?);
let next = self.lexer.peek();
if next.is_none() || next.unwrap() != "," {
if next.is_none() || next.as_ref().unwrap() != "," {
return Ok(list);
}
self.lexer.expect_next()?;
}
}
@ -81,7 +106,7 @@ impl<'input> AssemblyParser<'input> {
// If we're already at the end of the line, then it's an empty list, so return
let next = self.lexer.peek();
if next.is_none() {
if next.is_none() || next.as_ref().unwrap() == "\n" {
return Ok(list);
}
@ -160,9 +185,9 @@ pub struct AssemblyLexer<'input> {
}
impl<'input> AssemblyLexer<'input> {
pub fn new(lineno: usize, input: &'input str) -> Self {
pub fn new(input: &'input str) -> Self {
Self {
lineno,
lineno: 1,
chars: input.chars().peekable(),
peeked: None,
}
@ -172,6 +197,11 @@ impl<'input> AssemblyLexer<'input> {
self.lineno
}
pub fn get_next_lineno(&mut self) -> usize {
self.eat_whitespace();
self.lineno
}
pub fn get_next(&mut self) -> Option<String> {
if self.peeked.is_some() {
let result = std::mem::replace(&mut self.peeked, None);
@ -214,20 +244,53 @@ impl<'input> AssemblyLexer<'input> {
}
pub fn expect_end(&mut self) -> Result<(), Error> {
if let Some(token) = self.get_next() {
Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token)))
} else {
let token = self.get_next();
if token.is_none() || token.as_ref().unwrap() == "\n" {
Ok(())
} else {
Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token)))
}
}
fn eat_whitespace(&mut self) {
while self.chars.next_if(|ch| is_whitespace(*ch)).is_some() { }
}
}
while let Some(ch) = self.chars.peek() {
if *ch == '|' {
self.read_until('\n')
} else if *ch == '/' {
self.chars.next();
if self.chars.next_if(|ch| *ch == '*').is_some() {
loop {
self.read_until('*');
self.chars.next();
if self.chars.next_if(|ch| *ch == '/').is_some() {
break;
}
}
} else {
fn is_whitespace(ch: char) -> bool {
ch == ' ' || ch == '\n' || ch == '\t'
}
} else if *ch == ' ' || *ch == '\t' || *ch == '\r' {
self.chars.next();
} else {
if *ch == '\n' {
self.lineno += 1;
}
break;
}
}
}
fn read_until(&mut self, test: char) {
while let Some(ch) = self.chars.peek() {
if *ch == test {
return;
}
if *ch == '\n' {
self.lineno += 1;
}
self.chars.next();
}
}
}
fn is_word(ch: char) -> bool {