From 1ccee036c42e263769c50900a9f99910d2d2c5de Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 2 Mar 2019 14:19:54 -0500
Subject: [PATCH] Switches complete logic behind CAS to wave conversion to
 parsing tape files.

---
 Storage/Tape/Formats/CAS.cpp | 182 ++++++++++++++++++++++++++---------
 Storage/Tape/Formats/CAS.hpp |   6 +-
 2 files changed, 139 insertions(+), 49 deletions(-)

diff --git a/Storage/Tape/Formats/CAS.cpp b/Storage/Tape/Formats/CAS.cpp
index ef9e129f3..19c233a87 100644
--- a/Storage/Tape/Formats/CAS.cpp
+++ b/Storage/Tape/Formats/CAS.cpp
@@ -13,66 +13,156 @@
 
 using namespace Storage::Tape;
 
+/*
+	CAS files are a raw byte capture of tape content, with all solid tones transmuted to
+	the placeholder 1F A6 DE BA CC 13 7D 74 and gaps omitted.
+
+	Since that byte stream may also occur within files, and gaps and tone lengths need to be
+	reconstructed, knowledge of the MSX tape byte format is also required. Specifically:
+
+	Each tone followed by ten bytes that determine the file type:
+
+		ten bytes of value 0xD0 => a binary file;
+		ten bytes of value 0xD3 => it's a basic file;
+		ten bytes of value 0xEA => it's an ASCII file; and
+		any other pattern implies a raw data block.
+
+	Raw data blocks contain their two-byte length, then data.
+
+	Binary, Basic and ASCII files then have a six-byte file name, followed by a short tone, followed
+	by the file contents.
+
+	ASCII files:
+
+		... are a sequence of short tone/256-byte chunk pairs. For CAS purposes, these continue until
+		you hit another 1F A6 DE BA CC 13 7D 74 sequence.
+
+	Binary files:
+
+		... begin with three 16-bit values, the starting, ending and execution addresses. Then there is
+		the correct amount of data to fill memory from the starting to the ending address, inclusive.
+
+	BASIC files:
+
+		... are in Microsoft-standard BASIC form of (two bytes link to next line), (two bytes line number), [tokens],
+		starting from address 0x8001. These files continue until a next line address of 0x0000 is found, then
+		are usually padded by 0s for a period that I haven't yet determined a pattern for. The code below treats
+		everything to the next 0x1f as padding.
+*/
+
 namespace  {
 	const uint8_t header_signature[8] = {0x1f, 0xa6, 0xde, 0xba, 0xcc, 0x13, 0x7d, 0x74};
+
+	#define TenX(x) {x, x, x, x, x, x, x, x, x, x}
+	const uint8_t binary_signature[] = TenX(0xd0);
+	const uint8_t basic_signature[] = TenX(0xd3);
+	const uint8_t ascii_signature[] = TenX(0xea);
 }
 
 CAS::CAS(const std::string &file_name) {
 	Storage::FileHolder file(file_name);
-	uint8_t lookahead[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-	// Entirely fill the lookahead and verify that its start matches the header signature.
-	get_next(file, lookahead, 10);
-	if(std::memcmp(lookahead, header_signature, sizeof(header_signature))) throw ErrorNotCAS;
+	enum class Mode {
+		Seeking,
+		ASCII,
+		Binary,
+		BASIC
+	} parsing_mode_ = Mode::Seeking;
 
-	while(!file.eof()) {
-		// Just found a header, so flush the lookahead.
-		get_next(file, lookahead, 8);
-
-		// Create a new chunk
-		chunks_.emplace_back();
-		Chunk &chunk = chunks_.back();
-
-		// Decide whether to award a long header and/or a gap.
-		bool bytes_are_equal = true;
-		for(std::size_t index = 0; index < sizeof(lookahead); index++)
-			bytes_are_equal &= (lookahead[index] == lookahead[0]);
-
-		chunk.long_header = bytes_are_equal && ((lookahead[0] == 0xd3) || (lookahead[0] == 0xd0) || (lookahead[0] == 0xea));
-		chunk.has_gap = chunk.long_header && (chunks_.size() > 1);
-
-		// Keep going until another header arrives or the file ends. Headers require the magic byte sequence,
-		// and also must be eight-byte aligned within the file.
-		while(	!file.eof() &&
-				(std::memcmp(lookahead, header_signature, sizeof(header_signature)) || ((file.tell()-10)&7))) {
-			chunk.data.push_back(lookahead[0]);
-			get_next(file, lookahead, 1);
+	while(true) {
+		// Churn through the file until the next header signature is found.
+		const auto header_position = file.tell();
+		const auto signature = file.read(8);
+		if(signature.size() != 8) break;
+		if(std::memcmp(signature.data(), header_signature, 8)) {
+			// Check for other 1fs in this stream, and repeat from there if any.
+			for(size_t c = 1; c < 8; ++c) {
+				if(signature[c] == 0x1f) {
+					file.seek(header_position + long(c), SEEK_SET);
+					break;
+				}
+			}
+			continue;
 		}
 
-		// If the file ended, flush the lookahead. The final thing in it will be a 0xff from the read that
-		// triggered the eof, so don't include that.
-		if(file.eof()) {
-			for(std::size_t index = 0; index < sizeof(lookahead) - 1; index++)
-				chunk.data.push_back(lookahead[index]);
+		// A header has definitely been found. Require from here at least 16 further bytes,
+		// being the type and a name.
+		const auto type = file.read(10);
+		if(type.size() != 10) break;
+
+		const bool is_binary	= !std::memcmp(type.data(), binary_signature, type.size());
+		const bool is_basic		= !std::memcmp(type.data(), basic_signature, type.size());
+		const bool is_ascii		= !std::memcmp(type.data(), ascii_signature, type.size());
+
+		switch(parsing_mode_) {
+			case Mode::Seeking: {
+				if(is_ascii || is_binary || is_basic) {
+					file.seek(header_position + 8, SEEK_SET);
+					chunks_.emplace_back(!chunks_.empty(), true, file.read(10 + 6));
+
+					if(is_ascii)	parsing_mode_ = Mode::ASCII;
+					if(is_binary)	parsing_mode_ = Mode::Binary;
+					if(is_basic)	parsing_mode_ = Mode::BASIC;
+				} else {
+					// Raw data appears now. Grab its length and keep going.
+					file.seek(header_position + 8, SEEK_SET);
+					const uint16_t length = file.get16le();
+
+					file.seek(header_position, SEEK_SET);
+					chunks_.emplace_back(false, false, file.read(size_t(length) + 2 + 8));
+				}
+			} break;
+
+			case Mode::ASCII:
+				// Keep reading ASCII in 256-byte segments until a non-ASCII chunk arrives.
+				if(is_binary || is_basic || is_ascii) {
+					file.seek(header_position, SEEK_SET);
+					parsing_mode_ = Mode::Seeking;
+				} else {
+					file.seek(header_position + 8, SEEK_SET);
+					chunks_.emplace_back(false, false, file.read(256));
+				}
+			break;
+
+			case Mode::Binary: {
+				// Get the start and end addresses in order to figure out how much data
+				// is here.
+				file.seek(header_position + 8, SEEK_SET);
+				const uint16_t start_address = file.get16le();
+				const uint16_t end_address = file.get16le();
+
+				file.seek(header_position + 8, SEEK_SET);
+				const auto length = end_address - start_address + 1;
+				chunks_.emplace_back(false, false, file.read(size_t(length) + 6));
+
+				parsing_mode_ = Mode::Seeking;
+			} break;
+
+			case Mode::BASIC: {
+				// Horror of horrors, this will mean actually following the BASIC
+				// linked list of line contents.
+				file.seek(header_position + 8, SEEK_SET);
+				uint16_t address = 0x8001;	// the BASIC start address.
+				while(true) {
+					const uint16_t next_line_address = file.get16le();
+					if(!next_line_address || file.eof()) break;
+					file.seek(next_line_address - address - 2, SEEK_CUR);
+					address = next_line_address;
+				}
+
+				// Retain also any padding that follows the BASIC.
+				while(file.get8() != 0x1f);
+				const auto length = (file.tell() - 1) - (header_position + 8);
+
+				// Create the chunk and return to regular parsing.
+				file.seek(header_position + 8, SEEK_SET);
+				chunks_.emplace_back(false, false, file.read(size_t(length)));
+				parsing_mode_ = Mode::Seeking;
+			} break;
 		}
 	}
 }
 
-/*!
-	Treating @c buffer as a sliding lookahead, shifts it @c quantity elements to the left and
-	populates the new empty area to the right from @c file.
-*/
-void CAS::get_next(Storage::FileHolder &file, uint8_t (&buffer)[10], std::size_t quantity) {
-	assert(quantity <= sizeof(buffer));
-
-	if(quantity < sizeof(buffer))
-		std::memmove(buffer, &buffer[quantity], sizeof(buffer) - quantity);
-
-	while(quantity--) {
-		buffer[sizeof(buffer) - 1 - quantity] = file.get8();
-	}
-}
-
 bool CAS::is_at_end() {
 	return phase_ == Phase::EndOfFile;
 }
diff --git a/Storage/Tape/Formats/CAS.hpp b/Storage/Tape/Formats/CAS.hpp
index a20bc8a2b..e47ad3ab9 100644
--- a/Storage/Tape/Formats/CAS.hpp
+++ b/Storage/Tape/Formats/CAS.hpp
@@ -42,9 +42,6 @@ class CAS: public Tape {
 		void virtual_reset();
 		Pulse virtual_get_next_pulse();
 
-		// Helper for populating the file list, below.
-		void get_next(Storage::FileHolder &file, uint8_t (&buffer)[10], std::size_t quantity);
-
 		// Storage for the array of data blobs to transcribe into audio;
 		// each chunk is preceded by a header which may be long, and is optionally
 		// also preceded by a gap.
@@ -52,6 +49,9 @@ class CAS: public Tape {
 			bool has_gap;
 			bool long_header;
 			std::vector<std::uint8_t> data;
+
+			Chunk(bool has_gap, bool long_header, const std::vector<std::uint8_t> &data) :
+				has_gap(has_gap), long_header(long_header), data(std::move(data)) {}
 		};
 		std::vector<Chunk> chunks_;