mirror of
https://github.com/transistorfet/moa.git
synced 2024-11-21 19:30:52 +00:00
Modified the parser to work on the entire input
Previously it was going line by line, but that makes it hard to properly parse multiline comments, so I modified it to include line terminators in the token stream. I also added parsing of /* */ and | \n comment types. There is still a problem with line numbers in the post-parsing phases, but they seem correct in the parser/lexer stage. It's still not able to parse the syscall.s file from Computie but it's mostly just issues with named constants preceeded by a "#" or "-" character. As for the encoding stage, it has a problem with a move instruction that uses a label.
This commit is contained in:
parent
586a16509f
commit
62a484d317
@ -11,12 +11,17 @@ fn main() {
|
||||
let filename = env::args().nth(1).unwrap();
|
||||
let text = fs::read_to_string(filename).unwrap();
|
||||
|
||||
let words = assembler.assemble_words(&text).unwrap();
|
||||
|
||||
println!("Output:");
|
||||
for word in words.iter() {
|
||||
print!("{:04x} ", word);
|
||||
}
|
||||
println!("");
|
||||
match assembler.assemble_words(&text) {
|
||||
Ok(words) => {
|
||||
println!("Output:");
|
||||
for word in words.iter() {
|
||||
print!("{:04x} ", word);
|
||||
}
|
||||
println!("");
|
||||
},
|
||||
Err(err) => {
|
||||
println!("{}", err.msg);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,7 @@ pub enum Disallow {
|
||||
NoARegImmediateOrPC = 0x0702,
|
||||
NoRegsPrePostOrImmediate = 0x011B,
|
||||
NoImmediateOrPC = 0x0700,
|
||||
OnlyAReg = 0x07FD,
|
||||
}
|
||||
|
||||
impl Disallow {
|
||||
@ -113,18 +114,8 @@ impl M68kAssembler {
|
||||
}
|
||||
|
||||
fn parse(&mut self, text: &str) -> Result<Vec<(usize, AssemblyLine)>, Error> {
|
||||
let mut output = vec![];
|
||||
let iter = text.split_terminator("\n");
|
||||
|
||||
for (lineno, line_text) in iter.enumerate() {
|
||||
let mut parser = AssemblyParser::new(lineno, line_text);
|
||||
let parsed_line = parser.parse_line()?;
|
||||
if let Some(line) = parsed_line {
|
||||
output.push((lineno, line));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
let mut parser = AssemblyParser::new(text);
|
||||
parser.parse()
|
||||
}
|
||||
|
||||
fn apply_relocations(&mut self) -> Result<(), Error> {
|
||||
@ -212,12 +203,18 @@ impl M68kAssembler {
|
||||
let operation_size = get_size_from_mneumonic(mneumonic).ok_or_else(|| Error::new(&format!("error at line {}: expected a size specifier (b/w/l)", lineno)));
|
||||
match &mneumonic[..mneumonic.len() - 1] {
|
||||
|
||||
"addi" | "addai" => {
|
||||
"addi" => {
|
||||
self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
|
||||
},
|
||||
"add" | "adda" => {
|
||||
"addai" => {
|
||||
self.convert_common_immediate_instruction(lineno, 0x0600, args, operation_size?, Disallow::OnlyAReg)?;
|
||||
},
|
||||
"add" => {
|
||||
self.convert_common_dreg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?;
|
||||
},
|
||||
"adda" => {
|
||||
self.convert_common_areg_instruction(lineno, 0xD000, args, operation_size?, Disallow::None)?;
|
||||
},
|
||||
"andi" => {
|
||||
if !self.check_convert_flags_instruction(lineno, 0x23C, 0x27C, args)? {
|
||||
self.convert_common_immediate_instruction(lineno, 0x0200, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
|
||||
@ -291,12 +288,18 @@ impl M68kAssembler {
|
||||
self.convert_common_shift_instruction(lineno, mneumonic, 0xE010, args, operation_size?)?;
|
||||
},
|
||||
|
||||
"subi" | "subai" => {
|
||||
"subi" => {
|
||||
self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::NoARegImmediateOrPC)?;
|
||||
},
|
||||
"sub" | "suba" => {
|
||||
"subai" => {
|
||||
self.convert_common_immediate_instruction(lineno, 0x0400, args, operation_size?, Disallow::OnlyAReg)?;
|
||||
},
|
||||
"sub" => {
|
||||
self.convert_common_dreg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?;
|
||||
},
|
||||
"suba" => {
|
||||
self.convert_common_areg_instruction(lineno, 0x9000, args, operation_size?, Disallow::None)?;
|
||||
},
|
||||
|
||||
// TODO complete remaining instructions
|
||||
_ => return Err(Error::new(&format!("unrecognized instruction at line {}: {:?}", lineno, mneumonic))),
|
||||
@ -315,8 +318,16 @@ impl M68kAssembler {
|
||||
}
|
||||
|
||||
fn convert_common_dreg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> {
|
||||
self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoAReg)
|
||||
}
|
||||
|
||||
fn convert_common_areg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow) -> Result<(), Error> {
|
||||
self.convert_common_reg_instruction(lineno, opcode, args, operation_size, disallow, Disallow::NoDReg)
|
||||
}
|
||||
|
||||
fn convert_common_reg_instruction(&mut self, lineno: usize, opcode: u16, args: &[AssemblyOperand], operation_size: Size, disallow: Disallow, disallow_reg: Disallow) -> Result<(), Error> {
|
||||
expect_args(lineno, args, 2)?;
|
||||
let (direction, reg, operand) = convert_reg_and_other(lineno, args, Disallow::NoAReg)?;
|
||||
let (direction, reg, operand) = convert_reg_and_other(lineno, args, disallow_reg)?;
|
||||
let (effective_address, additional_words) = convert_target(lineno, operand, operation_size, disallow)?;
|
||||
self.output.push(opcode | encode_size(operation_size) | direction | (reg << 9) | effective_address);
|
||||
self.output.extend(additional_words);
|
||||
@ -419,12 +430,13 @@ fn convert_target(lineno: usize, operand: &AssemblyOperand, size: Size, disallow
|
||||
if name.starts_with("a") {
|
||||
let reg = expect_reg_num(lineno, name)?;
|
||||
return Ok(((0b100 << 3) | reg, vec![]));
|
||||
} else if name == "sp" {
|
||||
return Ok((0b100111, vec![]));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(Error::new(&format!("error at line {}: pre-decrement operator can only be used with a single address register", lineno)))
|
||||
},
|
||||
// TODO complete remaining types
|
||||
_ => Err(Error::new(&format!("not implemented: {:?}", operand))),
|
||||
}
|
||||
}
|
||||
@ -474,7 +486,19 @@ fn convert_indirect(lineno: usize, args: &[AssemblyOperand], disallow: Disallow)
|
||||
Ok(((0b101 << 3) | reg, convert_immediate(lineno, *offset, Size::Word)?))
|
||||
}
|
||||
},
|
||||
// TODO add the index register mode
|
||||
&[AssemblyOperand::Immediate(offset), AssemblyOperand::Register(name), AssemblyOperand::Register(index)] => {
|
||||
let index_reg = expect_reg_num(lineno, index)?;
|
||||
let da_select = if index.starts_with("a") { 1 << 15 } else { 0 };
|
||||
if name == "pc" {
|
||||
disallow.check(lineno, Disallow::NoPCRelativeIndex)?;
|
||||
Ok((0b111011, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)]))
|
||||
} else {
|
||||
disallow.check(lineno, Disallow::NoIndirectIndexReg)?;
|
||||
let reg = expect_address_reg_num(lineno, name)?;
|
||||
Ok(((0b110 << 3) | reg, vec![da_select | (index_reg << 12) | ((*offset as u16) & 0xff)]))
|
||||
}
|
||||
},
|
||||
// TODO add the MC68020 address options
|
||||
_ => {
|
||||
Err(Error::new(&format!("error at line {}: expected valid indirect addressing mode, but found {:?}", lineno, args)))
|
||||
}
|
||||
@ -498,14 +522,14 @@ fn convert_reg_and_other<'a>(lineno: usize, args: &'a [AssemblyOperand], disallo
|
||||
fn convert_immediate(lineno: usize, value: usize, size: Size) -> Result<Vec<u16>, Error> {
|
||||
match size {
|
||||
Size::Byte => {
|
||||
if value < u8::MAX as usize {
|
||||
if value <= u8::MAX as usize {
|
||||
Ok(vec![value as u16])
|
||||
} else {
|
||||
Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u8::MAX, value)))
|
||||
}
|
||||
},
|
||||
Size::Word => {
|
||||
if value < u16::MAX as usize {
|
||||
if value <= u16::MAX as usize {
|
||||
Ok(vec![value as u16])
|
||||
} else {
|
||||
Err(Error::new(&format!("error at line {}: immediate number is out of range; must be less than {}, but number is {:?}", lineno, u16::MAX, value)))
|
||||
|
@ -28,16 +28,32 @@ pub struct AssemblyParser<'input> {
|
||||
}
|
||||
|
||||
impl<'input> AssemblyParser<'input> {
|
||||
pub fn new(lineno: usize, input: &'input str) -> Self {
|
||||
pub fn new(input: &'input str) -> Self {
|
||||
Self {
|
||||
lexer: AssemblyLexer::new(lineno, input),
|
||||
lexer: AssemblyLexer::new(input),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_line(&mut self) -> Result<Option<AssemblyLine>, Error> {
|
||||
let token = match self.lexer.get_next() {
|
||||
Some(token) => token,
|
||||
None => return Ok(None),
|
||||
pub fn parse(&mut self) -> Result<Vec<(usize, AssemblyLine)>, Error> {
|
||||
let mut output = vec![];
|
||||
loop {
|
||||
let lineno = self.lexer.get_next_lineno();
|
||||
if let Some(line) = self.parse_line()? {
|
||||
output.push((lineno, line));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn parse_line(&mut self) -> Result<Option<AssemblyLine>, Error> {
|
||||
let token = loop {
|
||||
match self.lexer.get_next() {
|
||||
Some(token) if token == "\n" => { },
|
||||
Some(token) => { break token; }
|
||||
None => { return Ok(None); },
|
||||
}
|
||||
};
|
||||
|
||||
let result = match token.as_str() {
|
||||
@ -67,12 +83,21 @@ impl<'input> AssemblyParser<'input> {
|
||||
|
||||
fn parse_list_of_words(&mut self) -> Result<Vec<String>, Error> {
|
||||
let mut list = vec![];
|
||||
|
||||
// If we're already at the end of the line, then it's an empty list, so return
|
||||
let next = self.lexer.peek();
|
||||
if next.is_none() || next.as_ref().unwrap() == "\n" {
|
||||
return Ok(list);
|
||||
}
|
||||
|
||||
loop {
|
||||
list.push(self.lexer.expect_next()?);
|
||||
|
||||
let next = self.lexer.peek();
|
||||
if next.is_none() || next.unwrap() != "," {
|
||||
if next.is_none() || next.as_ref().unwrap() != "," {
|
||||
return Ok(list);
|
||||
}
|
||||
self.lexer.expect_next()?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -81,7 +106,7 @@ impl<'input> AssemblyParser<'input> {
|
||||
|
||||
// If we're already at the end of the line, then it's an empty list, so return
|
||||
let next = self.lexer.peek();
|
||||
if next.is_none() {
|
||||
if next.is_none() || next.as_ref().unwrap() == "\n" {
|
||||
return Ok(list);
|
||||
}
|
||||
|
||||
@ -160,9 +185,9 @@ pub struct AssemblyLexer<'input> {
|
||||
}
|
||||
|
||||
impl<'input> AssemblyLexer<'input> {
|
||||
pub fn new(lineno: usize, input: &'input str) -> Self {
|
||||
pub fn new(input: &'input str) -> Self {
|
||||
Self {
|
||||
lineno,
|
||||
lineno: 1,
|
||||
chars: input.chars().peekable(),
|
||||
peeked: None,
|
||||
}
|
||||
@ -172,6 +197,11 @@ impl<'input> AssemblyLexer<'input> {
|
||||
self.lineno
|
||||
}
|
||||
|
||||
pub fn get_next_lineno(&mut self) -> usize {
|
||||
self.eat_whitespace();
|
||||
self.lineno
|
||||
}
|
||||
|
||||
pub fn get_next(&mut self) -> Option<String> {
|
||||
if self.peeked.is_some() {
|
||||
let result = std::mem::replace(&mut self.peeked, None);
|
||||
@ -214,20 +244,53 @@ impl<'input> AssemblyLexer<'input> {
|
||||
}
|
||||
|
||||
pub fn expect_end(&mut self) -> Result<(), Error> {
|
||||
if let Some(token) = self.get_next() {
|
||||
Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token)))
|
||||
} else {
|
||||
let token = self.get_next();
|
||||
if token.is_none() || token.as_ref().unwrap() == "\n" {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::new(&format!("expected end of line at {}: found {:?}", self.lineno, token)))
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_whitespace(&mut self) {
|
||||
while self.chars.next_if(|ch| is_whitespace(*ch)).is_some() { }
|
||||
}
|
||||
}
|
||||
while let Some(ch) = self.chars.peek() {
|
||||
if *ch == '|' {
|
||||
self.read_until('\n')
|
||||
} else if *ch == '/' {
|
||||
self.chars.next();
|
||||
if self.chars.next_if(|ch| *ch == '*').is_some() {
|
||||
loop {
|
||||
self.read_until('*');
|
||||
self.chars.next();
|
||||
if self.chars.next_if(|ch| *ch == '/').is_some() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
||||
fn is_whitespace(ch: char) -> bool {
|
||||
ch == ' ' || ch == '\n' || ch == '\t'
|
||||
}
|
||||
} else if *ch == ' ' || *ch == '\t' || *ch == '\r' {
|
||||
self.chars.next();
|
||||
} else {
|
||||
if *ch == '\n' {
|
||||
self.lineno += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_until(&mut self, test: char) {
|
||||
while let Some(ch) = self.chars.peek() {
|
||||
if *ch == test {
|
||||
return;
|
||||
}
|
||||
if *ch == '\n' {
|
||||
self.lineno += 1;
|
||||
}
|
||||
self.chars.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_word(ch: char) -> bool {
|
||||
|
Loading…
Reference in New Issue
Block a user