Switches complete logic behind CAS to wave conversion to parsing tape files.

2025-08-13 00:25:26 +00:00 · 2019-03-02 14:19:54 -05:00
parent ef085e3f93
commit 1ccee036c4
2 changed files with 139 additions and 49 deletions
--- a/Storage/Tape/Formats/CAS.cpp
+++ b/Storage/Tape/Formats/CAS.cpp
@@ -13,66 +13,156 @@
 using namespace Storage::Tape;
 /*
 	CAS files are a raw byte capture of tape content, with all solid tones transmuted to
 	the placeholder 1F A6 DE BA CC 13 7D 74 and gaps omitted.
 	Since that byte stream may also occur within files, and gaps and tone lengths need to be
 	reconstructed, knowledge of the MSX tape byte format is also required. Specifically:
 	Each tone followed by ten bytes that determine the file type:
 		ten bytes of value 0xD0 => a binary file;
 		ten bytes of value 0xD3 => it's a basic file;
 		ten bytes of value 0xEA => it's an ASCII file; and
 		any other pattern implies a raw data block.
 	Raw data blocks contain their two-byte length, then data.
 	Binary, Basic and ASCII files then have a six-byte file name, followed by a short tone, followed
 	by the file contents.
 	ASCII files:
 		... are a sequence of short tone/256-byte chunk pairs. For CAS purposes, these continue until
 		you hit another 1F A6 DE BA CC 13 7D 74 sequence.
 	Binary files:
 		... begin with three 16-bit values, the starting, ending and execution addresses. Then there is
 		the correct amount of data to fill memory from the starting to the ending address, inclusive.
 	BASIC files:
 		... are in Microsoft-standard BASIC form of (two bytes link to next line), (two bytes line number), [tokens],
 		starting from address 0x8001. These files continue until a next line address of 0x0000 is found, then
 		are usually padded by 0s for a period that I haven't yet determined a pattern for. The code below treats
 		everything to the next 0x1f as padding.
 */
 namespace  {
 	const uint8_t header_signature[8] = {0x1f, 0xa6, 0xde, 0xba, 0xcc, 0x13, 0x7d, 0x74};
 	#define TenX(x) {x, x, x, x, x, x, x, x, x, x}
 	const uint8_t binary_signature[] = TenX(0xd0);
 	const uint8_t basic_signature[] = TenX(0xd3);
 	const uint8_t ascii_signature[] = TenX(0xea);
 }
 CAS::CAS(const std::string &file_name) {
 	Storage::FileHolder file(file_name);
 	uint8_t lookahead[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-	// Entirely fill the lookahead and verify that its start matches the header signature.
+	enum class Mode {
-	get_next(file, lookahead, 10);
+		Seeking,
-	if(std::memcmp(lookahead, header_signature, sizeof(header_signature))) throw ErrorNotCAS;
+		ASCII,
 		Binary,
 		BASIC
 	} parsing_mode_ = Mode::Seeking;
-	while(!file.eof()) {
+	while(true) {
-		// Just found a header, so flush the lookahead.
+		// Churn through the file until the next header signature is found.
-		get_next(file, lookahead, 8);
+		const auto header_position = file.tell();
-
+		const auto signature = file.read(8);
-		// Create a new chunk
+		if(signature.size() != 8) break;
-		chunks_.emplace_back();
+		if(std::memcmp(signature.data(), header_signature, 8)) {
-		Chunk &chunk = chunks_.back();
+			// Check for other 1fs in this stream, and repeat from there if any.
-
+			for(size_t c = 1; c < 8; ++c) {
-		// Decide whether to award a long header and/or a gap.
+				if(signature[c] == 0x1f) {
-		bool bytes_are_equal = true;
+					file.seek(header_position + long(c), SEEK_SET);
-		for(std::size_t index = 0; index < sizeof(lookahead); index++)
+					break;
-			bytes_are_equal &= (lookahead[index] == lookahead[0]);
+				}
-
+			}
-		chunk.long_header = bytes_are_equal && ((lookahead[0] == 0xd3) || (lookahead[0] == 0xd0) || (lookahead[0] == 0xea));
+			continue;
 		chunk.has_gap = chunk.long_header && (chunks_.size() > 1);
 		// Keep going until another header arrives or the file ends. Headers require the magic byte sequence,
 		// and also must be eight-byte aligned within the file.
 		while(	!file.eof() &&
 				(std::memcmp(lookahead, header_signature, sizeof(header_signature)) || ((file.tell()-10)&7))) {
 			chunk.data.push_back(lookahead[0]);
 			get_next(file, lookahead, 1);
 		}
-		// If the file ended, flush the lookahead. The final thing in it will be a 0xff from the read that
+		// A header has definitely been found. Require from here at least 16 further bytes,
-		// triggered the eof, so don't include that.
+		// being the type and a name.
-		if(file.eof()) {
+		const auto type = file.read(10);
-			for(std::size_t index = 0; index < sizeof(lookahead) - 1; index++)
+		if(type.size() != 10) break;
-				chunk.data.push_back(lookahead[index]);
+
 		const bool is_binary	= !std::memcmp(type.data(), binary_signature, type.size());
 		const bool is_basic		= !std::memcmp(type.data(), basic_signature, type.size());
 		const bool is_ascii		= !std::memcmp(type.data(), ascii_signature, type.size());
 		switch(parsing_mode_) {
 			case Mode::Seeking: {
 				if(is_ascii || is_binary || is_basic) {
 					file.seek(header_position + 8, SEEK_SET);
 					chunks_.emplace_back(!chunks_.empty(), true, file.read(10 + 6));
 					if(is_ascii)	parsing_mode_ = Mode::ASCII;
 					if(is_binary)	parsing_mode_ = Mode::Binary;
 					if(is_basic)	parsing_mode_ = Mode::BASIC;
 				} else {
 					// Raw data appears now. Grab its length and keep going.
 					file.seek(header_position + 8, SEEK_SET);
 					const uint16_t length = file.get16le();
 					file.seek(header_position, SEEK_SET);
 					chunks_.emplace_back(false, false, file.read(size_t(length) + 2 + 8));
 				}
 			} break;
 			case Mode::ASCII:
 				// Keep reading ASCII in 256-byte segments until a non-ASCII chunk arrives.
 				if(is_binary || is_basic || is_ascii) {
 					file.seek(header_position, SEEK_SET);
 					parsing_mode_ = Mode::Seeking;
 				} else {
 					file.seek(header_position + 8, SEEK_SET);
 					chunks_.emplace_back(false, false, file.read(256));
 				}
 			break;
 			case Mode::Binary: {
 				// Get the start and end addresses in order to figure out how much data
 				// is here.
 				file.seek(header_position + 8, SEEK_SET);
 				const uint16_t start_address = file.get16le();
 				const uint16_t end_address = file.get16le();
 				file.seek(header_position + 8, SEEK_SET);
 				const auto length = end_address - start_address + 1;
 				chunks_.emplace_back(false, false, file.read(size_t(length) + 6));
 				parsing_mode_ = Mode::Seeking;
 			} break;
 			case Mode::BASIC: {
 				// Horror of horrors, this will mean actually following the BASIC
 				// linked list of line contents.
 				file.seek(header_position + 8, SEEK_SET);
 				uint16_t address = 0x8001;	// the BASIC start address.
 				while(true) {
 					const uint16_t next_line_address = file.get16le();
 					if(!next_line_address || file.eof()) break;
 					file.seek(next_line_address - address - 2, SEEK_CUR);
 					address = next_line_address;
 				}
 				// Retain also any padding that follows the BASIC.
 				while(file.get8() != 0x1f);
 				const auto length = (file.tell() - 1) - (header_position + 8);
 				// Create the chunk and return to regular parsing.
 				file.seek(header_position + 8, SEEK_SET);
 				chunks_.emplace_back(false, false, file.read(size_t(length)));
 				parsing_mode_ = Mode::Seeking;
 			} break;
 		}
 	}
 }
 /*!
 	Treating @c buffer as a sliding lookahead, shifts it @c quantity elements to the left and
 	populates the new empty area to the right from @c file.
 */
 void CAS::get_next(Storage::FileHolder &file, uint8_t (&buffer)[10], std::size_t quantity) {
 	assert(quantity <= sizeof(buffer));
 	if(quantity < sizeof(buffer))
 		std::memmove(buffer, &buffer[quantity], sizeof(buffer) - quantity);
 	while(quantity--) {
 		buffer[sizeof(buffer) - 1 - quantity] = file.get8();
 	}
 }
 bool CAS::is_at_end() {
 	return phase_ == Phase::EndOfFile;
 }
--- a/Storage/Tape/Formats/CAS.hpp
+++ b/Storage/Tape/Formats/CAS.hpp
@@ -42,9 +42,6 @@ class CAS: public Tape {
 		void virtual_reset();
 		Pulse virtual_get_next_pulse();
 		// Helper for populating the file list, below.
 		void get_next(Storage::FileHolder &file, uint8_t (&buffer)[10], std::size_t quantity);
 		// Storage for the array of data blobs to transcribe into audio;
 		// each chunk is preceded by a header which may be long, and is optionally
 		// also preceded by a gap.
@@ -52,6 +49,9 @@ class CAS: public Tape {
 			bool has_gap;
 			bool long_header;
 			std::vector<std::uint8_t> data;
 			Chunk(bool has_gap, bool long_header, const std::vector<std::uint8_t> &data) :
 				has_gap(has_gap), long_header(long_header), data(std::move(data)) {}
 		};
 		std::vector<Chunk> chunks_;