From 1ccee036c42e263769c50900a9f99910d2d2c5de Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sat, 2 Mar 2019 14:19:54 -0500 Subject: [PATCH] Switches complete logic behind CAS to wave conversion to parsing tape files. --- Storage/Tape/Formats/CAS.cpp | 182 ++++++++++++++++++++++++++--------- Storage/Tape/Formats/CAS.hpp | 6 +- 2 files changed, 139 insertions(+), 49 deletions(-) diff --git a/Storage/Tape/Formats/CAS.cpp b/Storage/Tape/Formats/CAS.cpp index ef9e129f3..19c233a87 100644 --- a/Storage/Tape/Formats/CAS.cpp +++ b/Storage/Tape/Formats/CAS.cpp @@ -13,66 +13,156 @@ using namespace Storage::Tape; +/* + CAS files are a raw byte capture of tape content, with all solid tones transmuted to + the placeholder 1F A6 DE BA CC 13 7D 74 and gaps omitted. + + Since that byte stream may also occur within files, and gaps and tone lengths need to be + reconstructed, knowledge of the MSX tape byte format is also required. Specifically: + + Each tone followed by ten bytes that determine the file type: + + ten bytes of value 0xD0 => a binary file; + ten bytes of value 0xD3 => it's a basic file; + ten bytes of value 0xEA => it's an ASCII file; and + any other pattern implies a raw data block. + + Raw data blocks contain their two-byte length, then data. + + Binary, Basic and ASCII files then have a six-byte file name, followed by a short tone, followed + by the file contents. + + ASCII files: + + ... are a sequence of short tone/256-byte chunk pairs. For CAS purposes, these continue until + you hit another 1F A6 DE BA CC 13 7D 74 sequence. + + Binary files: + + ... begin with three 16-bit values, the starting, ending and execution addresses. Then there is + the correct amount of data to fill memory from the starting to the ending address, inclusive. + + BASIC files: + + ... are in Microsoft-standard BASIC form of (two bytes link to next line), (two bytes line number), [tokens], + starting from address 0x8001. These files continue until a next line address of 0x0000 is found, then + are usually padded by 0s for a period that I haven't yet determined a pattern for. The code below treats + everything to the next 0x1f as padding. +*/ + namespace { const uint8_t header_signature[8] = {0x1f, 0xa6, 0xde, 0xba, 0xcc, 0x13, 0x7d, 0x74}; + + #define TenX(x) {x, x, x, x, x, x, x, x, x, x} + const uint8_t binary_signature[] = TenX(0xd0); + const uint8_t basic_signature[] = TenX(0xd3); + const uint8_t ascii_signature[] = TenX(0xea); } CAS::CAS(const std::string &file_name) { Storage::FileHolder file(file_name); - uint8_t lookahead[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - // Entirely fill the lookahead and verify that its start matches the header signature. - get_next(file, lookahead, 10); - if(std::memcmp(lookahead, header_signature, sizeof(header_signature))) throw ErrorNotCAS; + enum class Mode { + Seeking, + ASCII, + Binary, + BASIC + } parsing_mode_ = Mode::Seeking; - while(!file.eof()) { - // Just found a header, so flush the lookahead. - get_next(file, lookahead, 8); - - // Create a new chunk - chunks_.emplace_back(); - Chunk &chunk = chunks_.back(); - - // Decide whether to award a long header and/or a gap. - bool bytes_are_equal = true; - for(std::size_t index = 0; index < sizeof(lookahead); index++) - bytes_are_equal &= (lookahead[index] == lookahead[0]); - - chunk.long_header = bytes_are_equal && ((lookahead[0] == 0xd3) || (lookahead[0] == 0xd0) || (lookahead[0] == 0xea)); - chunk.has_gap = chunk.long_header && (chunks_.size() > 1); - - // Keep going until another header arrives or the file ends. Headers require the magic byte sequence, - // and also must be eight-byte aligned within the file. - while( !file.eof() && - (std::memcmp(lookahead, header_signature, sizeof(header_signature)) || ((file.tell()-10)&7))) { - chunk.data.push_back(lookahead[0]); - get_next(file, lookahead, 1); + while(true) { + // Churn through the file until the next header signature is found. + const auto header_position = file.tell(); + const auto signature = file.read(8); + if(signature.size() != 8) break; + if(std::memcmp(signature.data(), header_signature, 8)) { + // Check for other 1fs in this stream, and repeat from there if any. + for(size_t c = 1; c < 8; ++c) { + if(signature[c] == 0x1f) { + file.seek(header_position + long(c), SEEK_SET); + break; + } + } + continue; } - // If the file ended, flush the lookahead. The final thing in it will be a 0xff from the read that - // triggered the eof, so don't include that. - if(file.eof()) { - for(std::size_t index = 0; index < sizeof(lookahead) - 1; index++) - chunk.data.push_back(lookahead[index]); + // A header has definitely been found. Require from here at least 16 further bytes, + // being the type and a name. + const auto type = file.read(10); + if(type.size() != 10) break; + + const bool is_binary = !std::memcmp(type.data(), binary_signature, type.size()); + const bool is_basic = !std::memcmp(type.data(), basic_signature, type.size()); + const bool is_ascii = !std::memcmp(type.data(), ascii_signature, type.size()); + + switch(parsing_mode_) { + case Mode::Seeking: { + if(is_ascii || is_binary || is_basic) { + file.seek(header_position + 8, SEEK_SET); + chunks_.emplace_back(!chunks_.empty(), true, file.read(10 + 6)); + + if(is_ascii) parsing_mode_ = Mode::ASCII; + if(is_binary) parsing_mode_ = Mode::Binary; + if(is_basic) parsing_mode_ = Mode::BASIC; + } else { + // Raw data appears now. Grab its length and keep going. + file.seek(header_position + 8, SEEK_SET); + const uint16_t length = file.get16le(); + + file.seek(header_position, SEEK_SET); + chunks_.emplace_back(false, false, file.read(size_t(length) + 2 + 8)); + } + } break; + + case Mode::ASCII: + // Keep reading ASCII in 256-byte segments until a non-ASCII chunk arrives. + if(is_binary || is_basic || is_ascii) { + file.seek(header_position, SEEK_SET); + parsing_mode_ = Mode::Seeking; + } else { + file.seek(header_position + 8, SEEK_SET); + chunks_.emplace_back(false, false, file.read(256)); + } + break; + + case Mode::Binary: { + // Get the start and end addresses in order to figure out how much data + // is here. + file.seek(header_position + 8, SEEK_SET); + const uint16_t start_address = file.get16le(); + const uint16_t end_address = file.get16le(); + + file.seek(header_position + 8, SEEK_SET); + const auto length = end_address - start_address + 1; + chunks_.emplace_back(false, false, file.read(size_t(length) + 6)); + + parsing_mode_ = Mode::Seeking; + } break; + + case Mode::BASIC: { + // Horror of horrors, this will mean actually following the BASIC + // linked list of line contents. + file.seek(header_position + 8, SEEK_SET); + uint16_t address = 0x8001; // the BASIC start address. + while(true) { + const uint16_t next_line_address = file.get16le(); + if(!next_line_address || file.eof()) break; + file.seek(next_line_address - address - 2, SEEK_CUR); + address = next_line_address; + } + + // Retain also any padding that follows the BASIC. + while(file.get8() != 0x1f); + const auto length = (file.tell() - 1) - (header_position + 8); + + // Create the chunk and return to regular parsing. + file.seek(header_position + 8, SEEK_SET); + chunks_.emplace_back(false, false, file.read(size_t(length))); + parsing_mode_ = Mode::Seeking; + } break; } } } -/*! - Treating @c buffer as a sliding lookahead, shifts it @c quantity elements to the left and - populates the new empty area to the right from @c file. -*/ -void CAS::get_next(Storage::FileHolder &file, uint8_t (&buffer)[10], std::size_t quantity) { - assert(quantity <= sizeof(buffer)); - - if(quantity < sizeof(buffer)) - std::memmove(buffer, &buffer[quantity], sizeof(buffer) - quantity); - - while(quantity--) { - buffer[sizeof(buffer) - 1 - quantity] = file.get8(); - } -} - bool CAS::is_at_end() { return phase_ == Phase::EndOfFile; } diff --git a/Storage/Tape/Formats/CAS.hpp b/Storage/Tape/Formats/CAS.hpp index a20bc8a2b..e47ad3ab9 100644 --- a/Storage/Tape/Formats/CAS.hpp +++ b/Storage/Tape/Formats/CAS.hpp @@ -42,9 +42,6 @@ class CAS: public Tape { void virtual_reset(); Pulse virtual_get_next_pulse(); - // Helper for populating the file list, below. - void get_next(Storage::FileHolder &file, uint8_t (&buffer)[10], std::size_t quantity); - // Storage for the array of data blobs to transcribe into audio; // each chunk is preceded by a header which may be long, and is optionally // also preceded by a gap. @@ -52,6 +49,9 @@ class CAS: public Tape { bool has_gap; bool long_header; std::vector data; + + Chunk(bool has_gap, bool long_header, const std::vector &data) : + has_gap(has_gap), long_header(long_header), data(std::move(data)) {} }; std::vector chunks_;