From 62a484d317ed872c3e226b21120d4e8a41e9906e Mon Sep 17 00:00:00 2001 From: transistor Date: Sun, 15 May 2022 20:44:36 -0700 Subject: [PATCH] Modified the parser to work on the entire input Previously it was going line by line, but that makes it hard to properly parse multiline comments, so I modified it to include line terminators in the token stream. I also added parsing of /* */ and | \n comment types. There is still a problem with line numbers in the post-parsing phases, but they seem correct in the parser/lexer stage. It's still not able to parse the syscall.s file from Computie but it's mostly just issues with named constants preceeded by a "#" or "-" character. As for the encoding stage, it has a problem with a move instruction that uses a label. --- src/bin/m68kas.rs | 19 +++++--- src/cpus/m68k/assembler.rs | 66 +++++++++++++++++-------- src/parser.rs | 99 +++++++++++++++++++++++++++++++------- 3 files changed, 138 insertions(+), 46 deletions(-) diff --git a/src/bin/m68kas.rs b/src/bin/m68kas.rs index 8ee47e7..b6c3431 100644 --- a/src/bin/m68kas.rs +++ b/src/bin/m68kas.rs @@ -11,12 +11,17 @@ fn main() { let filename = env::args().nth(1).unwrap(); let text = fs::read_to_string(filename).unwrap(); - let words = assembler.assemble_words(&text).unwrap(); - - println!("Output:"); - for word in words.iter() { - print!("{:04x} ", word); - } - println!(""); + match assembler.assemble_words(&text) { + Ok(words) => { + println!("Output:"); + for word in words.iter() { + print!("{:04x} ", word); + } + println!(""); + }, + Err(err) => { + println!("{}", err.msg); + }, + }; } diff --git a/src/cpus/m68k/assembler.rs b/src/cpus/m68k/assembler.rs index 555073c..9aa596f 100644 --- a/src/cpus/m68k/assembler.rs +++ b/src/cpus/m68k/assembler.rs @@ -31,6 +31,7 @@ pub enum Disallow { NoARegImmediateOrPC = 0x0702, NoRegsPrePostOrImmediate = 0x011B, NoImmediateOrPC = 0x0700, + OnlyAReg = 0x07FD, } impl Disallow { @@ -113,18 +114,8 @@ impl M68kAssembler { } fn parse(&mut self, text: &str) -> Result, Error> { - let mut output = vec![]; - let iter = text.split_terminator("\n"); - - for (lineno, line_text) in iter.enumerate() { - let mut parser = AssemblyParser::new(lineno, line_text); - let parsed_line = parser.parse_line()?; - if let Some(line) = parsed_line { - output.push((lineno, line)); - } - } - - Ok(output) + let mut parser = AssemblyParser::new(text); + parser.parse() } fn apply_relocations(&mut self) -> Result<(), Error> { @@ -212,12 +203,18 @@ impl M68kAssembler { let operation_size = get_size_from_mneumonic(mneumonic).ok_or_else(|| Error::new(&format!("error at line {}: expected a size specifier (b/w/l)", lineno))); match &mneumonic[..mneumonic.len() - 1] { - "addi" | "addai" => { + "addi" => { self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::NoARegImmediateOrPC)?; }, - "add" | "adda" => { + "addai" => { + self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::OnlyAReg)?; + }, + "add" => { self.convert_common_dreg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?; }, + "adda" => { + self.convert_common_areg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?; + }, "andi" => { if !self.check_convert_flags_instruction(lineno, 0x23C, 0x27C, args)? { self.convert_common_immediate_instruction(lineno, 0x0200, args, operation_size?, Disallow::NoARegImmediateOrPC)?; @@ -291,12 +288,18 @@ impl M68kAssembler { self.convert_common_shift_instruction(lineno, mneumonic, 0xE010, args, operation_size?)?; }, - "subi" | "subai" => { + "subi" => { self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::NoARegImmediateOrPC)?; }, - "sub" | "suba" => { + "subai" => { + self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::OnlyAReg)?; + }, + "sub" => { self.convert_common_dreg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?; }, + "suba" => { + self.convert_common_areg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?; + }, // TODO complete remaining instructions _ => return Err(Error::new(&format!("unrecognized instruction at line {}: {:?}", lineno, mneumonic))), @@ -315,8 +318,16 @@ impl M68kAssembler { } fn convert_common_dreg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> { + self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoAReg) + } + + fn convert_common_areg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> { + self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoDReg) + } + + fn convert_common_reg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow, disallow_reg: Disallow) -> Result<(), Error> { expect_args(lineno, args, 2)?; - let (direction, reg, operand) = convert_reg_and_other(lineno, args, Disallow::NoAReg)?; + let (direction, reg, operand) = convert_reg_and_other(lineno, args, disallow_reg)?; let (effective_address, additional_words) = convert_target(lineno, operand, operation_size, disallow)?; self.output.push(opcode | encode_size(operation_size) | direction | (reg << 9) | effective_address); self.output.extend(additional_words); @@ -419,12 +430,13 @@ fn convert_target(lineno: usize, operand: &AssemblyOperand, size: Size, disallow if name.starts_with("a") { let reg = expect_reg_num(lineno, name)?; return Ok(((0b100 << 3) | reg, vec![])); + } else if name == "sp" { + return Ok((0b100111, vec![])); } } } Err(Error::new(&format!("error at line {}: pre-decrement operator can only be used with a single address register", lineno))) }, - // TODO complete remaining types _ => Err(Error::new(&format!("not implemented: {:?}", operand))), } } @@ -474,7 +486,19 @@ fn convert_indirect(lineno: usize, args: &[AssemblyOperand], disallow: Disallow) Ok(((0b101 << 3) | reg, convert_immediate(lineno, *offset, Size::Word)?)) } }, - // TODO add the index register mode + &[AssemblyOperand::Immediate(offset), AssemblyOperand::Register(name), AssemblyOperand::Register(index)] => { + let index_reg = expect_reg_num(lineno, index)?; + let da_select = if index.starts_with("a") { 1 << 15 } else { 0 }; + if name == "pc" { + disallow.check(lineno, Disallow::NoPCRelativeIndex)?; + Ok((0b111011, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)])) + } else { + disallow.check(lineno, Disallow::NoIndirectIndexReg)?; + let reg = expect_address_reg_num(lineno, name)?; + Ok(((0b110 << 3) | reg, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)])) + } + }, + // TODO add the MC68020 address options _ => { Err(Error::new(&format!("error at line {}: expected valid indirect addressing mode, but found {:?}", lineno, args))) } @@ -498,14 +522,14 @@ fn convert_reg_and_other<'a>(lineno: usize, args: &'a [AssemblyOperand], disallo fn convert_immediate(lineno: usize, value: usize, size: Size) -> Result, Error> { match size { Size::Byte => { - if value < u8::MAX as usize { + if value <= u8::MAX as usize { Ok(vec![value as u16]) } else { Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u8::MAX, value))) } }, Size::Word => { - if value < u16::MAX as usize { + if value <= u16::MAX as usize { Ok(vec![value as u16]) } else { Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u16::MAX, value))) diff --git a/src/parser.rs b/src/parser.rs index 0588278..a7a454c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -28,16 +28,32 @@ pub struct AssemblyParser<'input> { } impl<'input> AssemblyParser<'input> { - pub fn new(lineno: usize, input: &'input str) -> Self { + pub fn new(input: &'input str) -> Self { Self { - lexer: AssemblyLexer::new(lineno, input), + lexer: AssemblyLexer::new(input), } } - pub fn parse_line(&mut self) -> Result, Error> { - let token = match self.lexer.get_next() { - Some(token) => token, - None => return Ok(None), + pub fn parse(&mut self) -> Result, Error> { + let mut output = vec![]; + loop { + let lineno = self.lexer.get_next_lineno(); + if let Some(line) = self.parse_line()? { + output.push((lineno, line)); + } else { + break; + } + } + Ok(output) + } + + fn parse_line(&mut self) -> Result, Error> { + let token = loop { + match self.lexer.get_next() { + Some(token) if token == "\n" => { }, + Some(token) => { break token; } + None => { return Ok(None); }, + } }; let result = match token.as_str() { @@ -67,12 +83,21 @@ impl<'input> AssemblyParser<'input> { fn parse_list_of_words(&mut self) -> Result, Error> { let mut list = vec![]; + + // If we're already at the end of the line, then it's an empty list, so return + let next = self.lexer.peek(); + if next.is_none() || next.as_ref().unwrap() == "\n" { + return Ok(list); + } + loop { list.push(self.lexer.expect_next()?); + let next = self.lexer.peek(); - if next.is_none() || next.unwrap() != "," { + if next.is_none() || next.as_ref().unwrap() != "," { return Ok(list); } + self.lexer.expect_next()?; } } @@ -81,7 +106,7 @@ impl<'input> AssemblyParser<'input> { // If we're already at the end of the line, then it's an empty list, so return let next = self.lexer.peek(); - if next.is_none() { + if next.is_none() || next.as_ref().unwrap() == "\n" { return Ok(list); } @@ -160,9 +185,9 @@ pub struct AssemblyLexer<'input> { } impl<'input> AssemblyLexer<'input> { - pub fn new(lineno: usize, input: &'input str) -> Self { + pub fn new(input: &'input str) -> Self { Self { - lineno, + lineno: 1, chars: input.chars().peekable(), peeked: None, } @@ -172,6 +197,11 @@ impl<'input> AssemblyLexer<'input> { self.lineno } + pub fn get_next_lineno(&mut self) -> usize { + self.eat_whitespace(); + self.lineno + } + pub fn get_next(&mut self) -> Option { if self.peeked.is_some() { let result = std::mem::replace(&mut self.peeked, None); @@ -214,20 +244,53 @@ impl<'input> AssemblyLexer<'input> { } pub fn expect_end(&mut self) -> Result<(), Error> { - if let Some(token) = self.get_next() { - Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token))) - } else { + let token = self.get_next(); + if token.is_none() || token.as_ref().unwrap() == "\n" { Ok(()) + } else { + Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token))) } } fn eat_whitespace(&mut self) { - while self.chars.next_if(|ch| is_whitespace(*ch)).is_some() { } - } -} + while let Some(ch) = self.chars.peek() { + if *ch == '|' { + self.read_until('\n') + } else if *ch == '/' { + self.chars.next(); + if self.chars.next_if(|ch| *ch == '*').is_some() { + loop { + self.read_until('*'); + self.chars.next(); + if self.chars.next_if(|ch| *ch == '/').is_some() { + break; + } + } + } else { -fn is_whitespace(ch: char) -> bool { - ch == ' ' || ch == '\n' || ch == '\t' + } + } else if *ch == ' ' || *ch == '\t' || *ch == '\r' { + self.chars.next(); + } else { + if *ch == '\n' { + self.lineno += 1; + } + break; + } + } + } + + fn read_until(&mut self, test: char) { + while let Some(ch) = self.chars.peek() { + if *ch == test { + return; + } + if *ch == '\n' { + self.lineno += 1; + } + self.chars.next(); + } + } } fn is_word(ch: char) -> bool {