Modified the parser to work on the entire input

Previously it was going line by line, but that makes it hard to properly parse multiline comments, so I modified it to include line terminators in the token stream. I also added parsing of /* */ and | \n comment types. There is still a problem with line numbers in the post-parsing phases, but they seem correct in the parser/lexer stage. It's still not able to parse the syscall.s file from Computie but it's mostly just issues with named constants preceeded by a "#" or "-" character. As for the encoding stage, it has a problem with a move instruction that uses a label.
2022-05-15 20:44:36 -07:00 · 2022-05-15 20:44:36 -07:00 · 62a484d317
parent 586a16509f
commit 62a484d317
3 changed files with 138 additions and 46 deletions
--- a/src/bin/m68kas.rs
+++ b/src/bin/m68kas.rs
@ -11,12 +11,17 @@ fn main() {
    let filename = env::args().nth(1).unwrap();
    let text = fs::read_to_string(filename).unwrap();

-    let words = assembler.assemble_words(&text).unwrap();
-
-    println!("Output:");
-    for word in words.iter() {
-        print!("{:04x} ", word);
-    }
-    println!("");
+    match assembler.assemble_words(&text) {
+        Ok(words) => {
+            println!("Output:");
+            for word in words.iter() {
+                print!("{:04x} ", word);
+            }
+            println!("");
+        },
+        Err(err) => {
+            println!("{}", err.msg);
+        },
+    };
 }
 
--- a/src/cpus/m68k/assembler.rs
+++ b/src/cpus/m68k/assembler.rs
@ -31,6 +31,7 @@ pub enum Disallow {
    NoARegImmediateOrPC         = 0x0702,
    NoRegsPrePostOrImmediate    = 0x011B,
    NoImmediateOrPC             = 0x0700,
+    OnlyAReg                    = 0x07FD,
 }

 impl Disallow {
@ -113,18 +114,8 @@ impl M68kAssembler {
    }

    fn parse(&mut self, text: &str) -> Result<Vec<(usize, AssemblyLine)>, Error> {
-        let mut output = vec![];
-        let iter = text.split_terminator("\n");
-
-        for (lineno, line_text) in iter.enumerate() {
-            let mut parser = AssemblyParser::new(lineno, line_text);
-            let parsed_line = parser.parse_line()?;
-            if let Some(line) = parsed_line {
-                output.push((lineno, line));
-            }
-        }
-
-        Ok(output)
+        let mut parser = AssemblyParser::new(text);
+        parser.parse()
    }

    fn apply_relocations(&mut self) -> Result<(), Error> {
@ -212,12 +203,18 @@ impl M68kAssembler {
        let operation_size = get_size_from_mneumonic(mneumonic).ok_or_else(|| Error::new(&format!("error at line {}: expected a size specifier (b/w/l)", lineno)));
        match &mneumonic[..mneumonic.len() - 1] {

-            "addi" | "addai" => {
+            "addi" => {
                self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
            },
-            "add" | "adda" => {
+            "addai" => {
+                self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::OnlyAReg)?;
+            },
+            "add" => {
                self.convert_common_dreg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?;
            },
+            "adda" => {
+                self.convert_common_areg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?;
+            },
            "andi" => {
                if !self.check_convert_flags_instruction(lineno, 0x23C, 0x27C, args)? {
                    self.convert_common_immediate_instruction(lineno, 0x0200, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
@ -291,12 +288,18 @@ impl M68kAssembler {
                self.convert_common_shift_instruction(lineno, mneumonic, 0xE010, args, operation_size?)?;
            },

-            "subi" | "subai" => {
+            "subi" => {
                self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
            },
-            "sub" | "suba" => {
+            "subai" => {
+                self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::OnlyAReg)?;
+            },
+            "sub" => {
                self.convert_common_dreg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?;
            },
+            "suba" => {
+                self.convert_common_areg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?;
+            },

            // TODO complete remaining instructions
            _ => return Err(Error::new(&format!("unrecognized instruction at line {}: {:?}", lineno, mneumonic))),
@ -315,8 +318,16 @@ impl M68kAssembler {
    }

    fn convert_common_dreg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> {
+        self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoAReg)
+    }
+
+    fn convert_common_areg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> {
+        self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoDReg)
+    }
+
+    fn convert_common_reg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow, disallow_reg: Disallow) -> Result<(), Error> {
        expect_args(lineno, args, 2)?;
-        let (direction, reg, operand) = convert_reg_and_other(lineno, args, Disallow::NoAReg)?;
+        let (direction, reg, operand) = convert_reg_and_other(lineno, args, disallow_reg)?;
        let (effective_address, additional_words) = convert_target(lineno, operand, operation_size, disallow)?;
        self.output.push(opcode | encode_size(operation_size) | direction | (reg << 9) | effective_address);
        self.output.extend(additional_words);
@ -419,12 +430,13 @@ fn convert_target(lineno: usize, operand: &AssemblyOperand, size: Size, disallow
                    if name.starts_with("a") {
                        let reg = expect_reg_num(lineno, name)?;
                        return Ok(((0b100 << 3) | reg, vec![]));
+                    } else if name == "sp" {
+                        return Ok((0b100111, vec![]));
                    }
                }
            }
            Err(Error::new(&format!("error at line {}: pre-decrement operator can only be used with a single address register", lineno)))
        },
-        // TODO complete remaining types
        _ => Err(Error::new(&format!("not implemented: {:?}", operand))),
    }
 }
@ -474,7 +486,19 @@ fn convert_indirect(lineno: usize, args: &[AssemblyOperand], disallow: Disallow)
                Ok(((0b101 << 3) | reg, convert_immediate(lineno, *offset, Size::Word)?))
            }
        },
-        // TODO add the index register mode
+        &[AssemblyOperand::Immediate(offset), AssemblyOperand::Register(name), AssemblyOperand::Register(index)] => {
+            let index_reg = expect_reg_num(lineno, index)?;
+            let da_select = if index.starts_with("a") { 1 << 15 } else { 0 };
+            if name == "pc" {
+                disallow.check(lineno, Disallow::NoPCRelativeIndex)?;
+                Ok((0b111011, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)]))
+            } else {
+                disallow.check(lineno, Disallow::NoIndirectIndexReg)?;
+                let reg = expect_address_reg_num(lineno, name)?;
+                Ok(((0b110 << 3) | reg, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)]))
+            }
+        },
+        // TODO add the MC68020 address options
        _ => {
            Err(Error::new(&format!("error at line {}: expected valid indirect addressing mode, but found {:?}", lineno, args)))
        }
@ -498,14 +522,14 @@ fn convert_reg_and_other<'a>(lineno: usize, args: &'a [AssemblyOperand], disallo
 fn convert_immediate(lineno: usize, value: usize, size: Size) -> Result<Vec<u16>, Error> {
    match size {
        Size::Byte => {
-            if value < u8::MAX as usize {
+            if value <= u8::MAX as usize {
                Ok(vec![value as u16])
            } else {
                Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u8::MAX, value)))
            }
        },
        Size::Word => {
-            if value < u16::MAX as usize {
+            if value <= u16::MAX as usize {
                Ok(vec![value as u16])
            } else {
                Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u16::MAX, value)))
--- a/src/parser.rs
+++ b/src/parser.rs
@ -28,16 +28,32 @@ pub struct AssemblyParser<'input> {
 }

 impl<'input> AssemblyParser<'input> {
-    pub fn new(lineno: usize, input: &'input str) -> Self {
+    pub fn new(input: &'input str) -> Self {
        Self {
-            lexer: AssemblyLexer::new(lineno, input),
+            lexer: AssemblyLexer::new(input),
        }
    }

-    pub fn parse_line(&mut self) -> Result<Option<AssemblyLine>, Error> {
-        let token = match self.lexer.get_next() {
-            Some(token) => token,
-            None => return Ok(None),
+    pub fn parse(&mut self) -> Result<Vec<(usize, AssemblyLine)>, Error> {
+        let mut output = vec![];
+        loop {
+            let lineno = self.lexer.get_next_lineno();
+            if let Some(line) = self.parse_line()? {
+                output.push((lineno, line));
+            } else {
+                break;
+            }
+        }
+        Ok(output)
+    }
+
+    fn parse_line(&mut self) -> Result<Option<AssemblyLine>, Error> {
+        let token = loop {
+            match self.lexer.get_next() {
+                Some(token) if token == "\n" => { },
+                Some(token) => { break token; }
+                None => { return Ok(None); },
+            }
        };

        let result = match token.as_str() {
@ -67,12 +83,21 @@ impl<'input> AssemblyParser<'input> {

    fn parse_list_of_words(&mut self) -> Result<Vec<String>, Error> {
        let mut list = vec![];
+
+        // If we're already at the end of the line, then it's an empty list, so return
+        let next = self.lexer.peek();
+        if next.is_none() || next.as_ref().unwrap() == "\n" {
+            return Ok(list);
+        }
+
        loop {
            list.push(self.lexer.expect_next()?);
+
            let next = self.lexer.peek();
-            if next.is_none() || next.unwrap() != "," {
+            if next.is_none() || next.as_ref().unwrap() != "," {
                return Ok(list);
            }
+            self.lexer.expect_next()?;
        }
    }

@ -81,7 +106,7 @@ impl<'input> AssemblyParser<'input> {

        // If we're already at the end of the line, then it's an empty list, so return
        let next = self.lexer.peek();
-        if next.is_none() {
+        if next.is_none() || next.as_ref().unwrap() == "\n" {
            return Ok(list);
        }

@ -160,9 +185,9 @@ pub struct AssemblyLexer<'input> {
 }

 impl<'input> AssemblyLexer<'input> {
-    pub fn new(lineno: usize, input: &'input str) -> Self {
+    pub fn new(input: &'input str) -> Self {
        Self {
-            lineno,
+            lineno: 1,
            chars: input.chars().peekable(),
            peeked: None,
        }
@ -172,6 +197,11 @@ impl<'input> AssemblyLexer<'input> {
        self.lineno
    }

+    pub fn get_next_lineno(&mut self) -> usize {
+        self.eat_whitespace();
+        self.lineno
+    }
+
    pub fn get_next(&mut self) -> Option<String> {
        if self.peeked.is_some() {
            let result = std::mem::replace(&mut self.peeked, None);
@ -214,20 +244,53 @@ impl<'input> AssemblyLexer<'input> {
    }

    pub fn expect_end(&mut self) -> Result<(), Error> {
-        if let Some(token) = self.get_next() {
-            Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token)))
-        } else {
+        let token = self.get_next();
+        if token.is_none() || token.as_ref().unwrap() == "\n" {
            Ok(())
+        } else {
+            Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token)))
        }
    }

    fn eat_whitespace(&mut self) {
-        while self.chars.next_if(|ch| is_whitespace(*ch)).is_some() { }
-    }
-}
+        while let Some(ch) = self.chars.peek() {
+            if *ch == '|' {
+                self.read_until('\n')
+            } else if *ch == '/' {
+                self.chars.next();
+                if self.chars.next_if(|ch| *ch == '*').is_some() {
+                    loop {
+                        self.read_until('*');
+                        self.chars.next();
+                        if self.chars.next_if(|ch| *ch == '/').is_some() {
+                            break;
+                        }
+                    }
+                } else {

-fn is_whitespace(ch: char) -> bool {
-    ch == ' ' || ch == '\n' || ch == '\t'
+                }
+            } else if *ch == ' ' || *ch == '\t' || *ch == '\r' {
+                self.chars.next();
+            } else {
+                if *ch == '\n' {
+                    self.lineno += 1;
+                }
+                break;
+            }
+        }
+    }
+
+    fn read_until(&mut self, test: char) {
+        while let Some(ch) = self.chars.peek() {
+            if *ch == test {
+                return;
+            }
+            if *ch == '\n' {
+                self.lineno += 1;
+            }
+            self.chars.next();
+        }
+    }
 }

 fn is_word(ch: char) -> bool {