Starts on the next piece: parsers.

2025-04-14 03:37:04 +00:00 · 2021-01-16 19:54:40 -05:00 · 2021-01-16 19:54:40 -05:00 · 3dc36b704a
commit 3dc36b704a
parent 37a20e125c
4 changed files with 163 additions and 24 deletions
--- a/InstructionSets/M50740/Instruction.hpp
+++ b/InstructionSets/M50740/Instruction.hpp
@ -63,24 +63,39 @@ inline int size(AddressingMode mode) {
 enum class Operation: uint8_t {
 	Invalid,

-	ADC,	AND,	ASL,	BBC,
-	BBS,	BCC,	BCS,	BEQ,
-	BIT,	BMI,	BNE,	BPL,
-	BRA,	BRK,	BVC,	BVS,
-	CLB,	CLC,	CLD,	CLI,
-	CLT,	CLV,	CMP,	COM,
-	CPX,	CPY,	DEC,	DEX,
-	DEY,	EOR,	FST,	INC,
-	INX,	INY,	JMP,	JSR,
-	LDA,	LDM,	LDX,	LDY,
-	LSR,	NOP,	ORA,	PHA,
-	PHP,	PLA,	PLP,	ROL,
-	ROR,	RRF,	RTI,	RTS,
-	SBC,	SEB,	SEC,	SED,
-	SEI,	SET,	SLW,	STA,
-	STP,	STX,	STY,	TAX,
-	TAY,	TST,	TSX,	TXA,
-	TXS,	TYA
+	// Operations that don't access memory.
+	BBC,	BBS,	BCC,	BCS,
+	BEQ,	BMI,	BNE,	BPL,
+	BVC,	BVS,	BRA,	BRK,
+	JMP,	JSR,
+	RTI,	RTS,
+	CLC,	CLD,	CLI,	CLT,	CLV,
+	SEC,	SED,	SEI,	SET,
+	INX,	INY,	DEX,	DEY,
+	FST,	SLW,
+	NOP,
+	PHA, 	PHP, 	PLA,	PLP,
+	STP,
+	TAX,	TAY,	TSX,	TXA,
+	TXS,	TYA,
+
+	// Read operations.
+	ADC,	SBC,
+	AND,	ORA,	EOR,	BIT,
+	CMP,	CPX,	CPY,
+	LDA,	LDX,	LDY,
+	TST,
+
+	// Write operations.
+	LDM,
+	STA,	STX,	STY,
+
+	// Read-modify-write operations.
+	ASL,	LSR,
+	CLB,	SEB,
+	COM,
+	DEC,	INC,
+	ROL,	ROR,	RRF,
 };

 struct Instruction {
--- a/InstructionSets/M50740/Parser.hpp
+++ b/InstructionSets/M50740/Parser.hpp
@ -0,0 +1,98 @@
+//
+//  Parser.hpp
+//  Clock Signal
+//
+//  Created by Thomas Harte on 1/16/21.
+//  Copyright © 2021 Thomas Harte. All rights reserved.
+//
+
+#ifndef InstructionSets_M50740_Parser_hpp
+#define InstructionSets_M50740_Parser_hpp
+
+#include <cstdint>
+#include "Decoder.hpp"
+
+namespace InstructionSet {
+namespace M50740 {
+
+template<typename Target> struct Parser {
+	void parse(Target &target, const uint8_t *storage, uint16_t start, uint16_t closing_bound) {
+		Decoder decoder;
+
+		while(start != closing_bound) {
+			const auto next = decoder.decode(&storage[start], closing_bound - start);
+			if(next.first <= 0) {
+				// If there weren't enough bytes left before the closing bound to complete
+				// an instruction, but implicitly there were some bytes left, announce overflow
+				// and terminate.
+				target.announce_overflow(start);
+				return;
+			} else {
+				// Pass on the instruction.
+				target.announce_instruction(start, next.second);
+
+				// Check for end of stream and potential new entry points.
+				switch(next.second.operation) {
+					// Terminating instructions.
+					case Operation::RTS: case Operation::RTI: case Operation::BRK:
+					return;
+
+					// Terminating operations with implied additional entry point.
+					case Operation::JMP:
+						target.add_entry(storage[start + 1] | (storage[start + 2] << 8));
+					return;
+					case Operation::BRA:
+						target.add_entry(start + 2 + int8_t(storage[start + 1]));
+					return;
+
+					// Instructions that suggest another entry point but don't terminate parsing.
+					case Operation::BBS: case Operation::BBC:
+					case Operation::BCC: case Operation::BCS:
+					case Operation::BVC: case Operation::BVS:
+					case Operation::BMI: case Operation::BPL:
+					case Operation::BNE: case Operation::BEQ:
+						target.add_entry(start + 2 + int8_t(storage[start + 1]));
+					break;
+					case Operation::JSR:
+						target.add_entry(storage[start + 1] | (storage[start + 2] << 8));
+					break;
+
+					default: break;
+				}
+
+				// Provide any fixed address accesses.
+				switch(next.second.addressing_mode) {
+					case AddressingMode::Absolute:
+						target.add_access(storage[start + 1] | (storage[start + 2] << 8));
+					break;
+					case AddressingMode::ZeroPage:
+					case AddressingMode::Bit0ZeroPage:	case AddressingMode::Bit1ZeroPage:
+					case AddressingMode::Bit2ZeroPage:	case AddressingMode::Bit3ZeroPage:
+					case AddressingMode::Bit4ZeroPage:	case AddressingMode::Bit5ZeroPage:
+					case AddressingMode::Bit6ZeroPage:	case AddressingMode::Bit7ZeroPage:
+						target.add_access(storage[start + 1]);
+					break;
+					case AddressingMode::SpecialPage:
+						target.add_access(storage[start + 1] | 0x1f00);
+					break;
+					case AddressingMode::ImmediateZeroPage:
+						target.add_access(storage[start + 2]);
+					break;
+					case AddressingMode::Bit0AccumulatorRelative:
+						target.add_access(start + 2 + int8_t(storage[start + 1]));
+					break;
+
+					default: break;
+				}
+
+				// Advance.
+				start += next.first;
+			}
+		}
+	}
+};
+
+}
+}
+
+#endif /* InstructionSets_M50740_Parser_hpp */
--- a/InstructionSets/README.md
+++ b/InstructionSets/README.md
@ -21,11 +21,11 @@ Disassemblers are likely to decode an instruction, output it, and then immediate

 Instruction executors may opt to cache decoded instructions to reduce recurrent costs, but will always be dealing with an actual instruction stream. The chance of caching means that decoded instructions should seek to be small. If helpful then a decoder might prefer to return a `std::pair` or similar of ephemeral information and stuff that it is meaningful to store.

-## Likely Interfaces
+### Likely Interfaces

 These examples assume that the processor itself doesn't hold any state that affects instruction parsing. Whether processors with such state offer more than one decoder or take state as an argument will be a question of measure and effect.  

-### Fixed-size instruction words
+#### Fixed-size instruction words

 If the instructions are a fixed size, the decoder can provide what is functionally a simple lookup, whether implemented as such or not:

@ -33,7 +33,7 @@ If the instructions are a fixed size, the decoder can provide what is functional

 For now I have preferred not to make this a simple constructor on `Instruction` because I'm reserving the option of switching to an ephemeral/permanent split in what's returned. More consideration needs to be applied here.

-### Variable-size instruction words
+#### Variable-size instruction words

 If instructions are a variable size, the decoder should maintain internal state such that it can be provided with fragments of instructions until a full decoding has occurred — this avoids an assumption that all source bytes will always be laid out linearly in memory.

@ -47,8 +47,32 @@ In this sample the returned pair provides an `int` size that is one of:

 A caller is permitted to react in any way it prefers to negative numbers; they're a hint potentially to reduce calling overhead only. A size of `0` would be taken to have the same meaning as a size of `-1`.  

-## Tying Decoders into Instruction Executors
+## Parsers

-It is assumed that disassemblers and bus-centric CPU emulators have limited generic functionality; for executors it is assumed that a processor-specific instruction fetcher and a dispatcher will be provided to couple with the decoder. 
+A parser sits one level above a decoder; it is handed:
+* a start address;
+* a closing bound; and
+* a target.

-Therefore decoders should adopt whatever interface is most natural; the expected uses information above is to provide a motivation for the scope of responsibilities and hints as to likely performance objectives only. Beyond requiring that decoded instructions be a tangible struct or class, it is not intended to be prescriptive as to form or interface.  
+It is responsible for parsing the instruction stream from the start address up to and not beyond the closing bound, and no further than any unconditional branches.
+
+It should post to the target:
+* any instructions fully decoded;
+* any conditional branch destinations encountered;
+* any immediately-knowable accessed addresses; and
+* if a final instruction exists but runs beyond the closing bound, notification of that fact.
+
+So a parser has the same two primary potential recipients as a decoder: diassemblers, and executors.
+
+## Executors
+
+An executor is responsible for only one thing:
+* mapping from decoded instructions to objects that can perform those instructions.
+
+An executor is assumed to bundle all the things that go into instruction set execution: processor state and memory, alongside a parser.
+
+## Caching Executor
+
+The caching executor is a generic class templated on a specific executor. It will use an executor to cache the results of parsing. 
+
+Idiomatically, the objects that perform instructions will expect to receive an appropriate executor as an argument. If they require other information, such as a copy of the decoded instruction, it should be built into the classes. 
--- a/Signal.xcodeproj/project.pbxproj
+++ b/Signal.xcodeproj/project.pbxproj
@ -1940,6 +1940,7 @@
 		4BEDA40A25B2844B000C2DBD /* Decoder.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Decoder.hpp; sourceTree = "<group>"; };
 		4BEDA40B25B2844B000C2DBD /* Decoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Decoder.cpp; sourceTree = "<group>"; };
 		4BEDA41725B2845D000C2DBD /* Instruction.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Instruction.hpp; sourceTree = "<group>"; };
+		4BEDA41D25B388E4000C2DBD /* Parser.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Parser.hpp; sourceTree = "<group>"; };
 		4BEE0A6A1D72496600532C7B /* Cartridge.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Cartridge.cpp; sourceTree = "<group>"; };
 		4BEE0A6B1D72496600532C7B /* Cartridge.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Cartridge.hpp; sourceTree = "<group>"; };
 		4BEE0A6D1D72496600532C7B /* PRG.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PRG.cpp; sourceTree = "<group>"; };
@ -4393,6 +4394,7 @@
 				4BEDA40B25B2844B000C2DBD /* Decoder.cpp */,
 				4BEDA40A25B2844B000C2DBD /* Decoder.hpp */,
 				4BEDA41725B2845D000C2DBD /* Instruction.hpp */,
+				4BEDA41D25B388E4000C2DBD /* Parser.hpp */,
 			);
 			path = M50740;
 			sourceTree = "<group>";