From 8da7806ee9cf2d6ce4016ad17efbccb52862b134 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Wed, 15 Jul 2020 22:27:04 -0400 Subject: [PATCH] Liberalises segment parser not necessarily to require a standard epilogue. It seems that real disks don't always have them; I guess the boot ROM doesn't require one. --- .../Disk/Encodings/AppleGCR/SegmentParser.cpp | 391 +++++++++--------- 1 file changed, 195 insertions(+), 196 deletions(-) diff --git a/Storage/Disk/Encodings/AppleGCR/SegmentParser.cpp b/Storage/Disk/Encodings/AppleGCR/SegmentParser.cpp index 9a91c4155..12a99e76e 100644 --- a/Storage/Disk/Encodings/AppleGCR/SegmentParser.cpp +++ b/Storage/Disk/Encodings/AppleGCR/SegmentParser.cpp @@ -12,6 +12,8 @@ #include +using namespace Storage::Encodings::AppleGCR; + namespace { const uint8_t six_and_two_unmapping[] = { @@ -55,9 +57,183 @@ uint8_t unmap_five_and_three(uint8_t source) { return five_and_three_unmapping[source - 0xab]; } +std::unique_ptr decode_macintosh_sector(const std::array &header, const std::unique_ptr &original) { + // There must be at least 704 bytes to decode from. + if(original->data.size() < 704) return nullptr; + + // Attempt a six-and-two unmapping of the header. + std::array decoded_header; + for(size_t c = 0; c < decoded_header.size(); ++c) { + decoded_header[c] = unmap_six_and_two(header[c]); + if(decoded_header[c] == 0xff) { + return nullptr; + } + } + + // Allocate a sector. + auto sector = std::make_unique(); + sector->data.resize(704); + + // Test the checksum. + if(decoded_header[4] != (decoded_header[0] ^ decoded_header[1] ^ decoded_header[2] ^ decoded_header[3])) + sector->has_header_checksum_error = true; + + // Decode the header. + sector->address.track = uint8_t(decoded_header[0] | ((decoded_header[2]&0x1f) << 6)); + sector->address.sector = decoded_header[1]; + sector->address.format = decoded_header[3]; + sector->address.is_side_two = decoded_header[2] & 0x20; + + // Reverse the GCR encoding of the sector contents to get back to 6-bit data. + for(size_t index = 0; index < sector->data.size(); ++index) { + sector->data[index] = unmap_six_and_two(original->data[index]); + if(sector->data[index] == 0xff) { + return nullptr; + } + } + + // The first byte in the sector is a repeat of the sector number; test it + // for correctness. + if(sector->data[0] != sector->address.sector) { + return nullptr; + } + + // Cf. the corresponding section of Encoder.cpp for logic below. + int checksum[3] = {0, 0, 0}; + for(size_t c = 0; c < 175; ++c) { + // Calculate the rolling checcksum in order to decode the bytes. + checksum[0] = (checksum[0] << 1) | (checksum[0] >> 7); + + // All offsets are +1 below, to skip the initial sector number duplicate. + const uint8_t top_bits = sector->data[1 + c*4]; + + // Decode first byte. + sector->data[0 + c * 3] = uint8_t((sector->data[2 + c*4] + ((top_bits & 0x30) << 2)) ^ checksum[0]); + checksum[2] += sector->data[0 + c * 3] + (checksum[0] >> 8); + + // Decode second byte; + sector->data[1 + c * 3] = uint8_t((sector->data[3 + c*4] + ((top_bits & 0x0c) << 4)) ^ checksum[2]); + checksum[1] += sector->data[1 + c * 3] + (checksum[2] >> 8); + + // Decode third byte, if there is one. + if(c != 174) { + sector->data[2 + c * 3] = uint8_t((sector->data[4 + c*4] + ((top_bits & 0x03) << 6)) ^ checksum[1]); + checksum[0] += sector->data[2 + c * 3] + (checksum[1] >> 8); + } + + // Reset carries. + checksum[0] &= 0xff; + checksum[1] &= 0xff; + checksum[2] &= 0xff; + } + + // Test the checksum. + if( + checksum[0] != uint8_t(sector->data[703] + ((sector->data[700] & 0x03) << 6)) || + checksum[1] != uint8_t(sector->data[702] + ((sector->data[700] & 0x0c) << 4)) || + checksum[2] != uint8_t(sector->data[701] + ((sector->data[700] & 0x30) << 2)) + ) sector->has_data_checksum_error = true; + + // Report success. + sector->data.resize(524); + sector->encoding = Sector::Encoding::Macintosh; + return sector; } -using namespace Storage::Encodings::AppleGCR; +std::unique_ptr decode_appleii_sector(const std::array &header, const std::unique_ptr &original, bool is_five_and_three) { + // There must be at least 411 bytes to decode a five-and-three sector from; + // there must be only 343 if this is a six-and-two sector. + const size_t data_size = is_five_and_three ? 411 : 343; + if(original->data.size() < data_size) return nullptr; + + // Check for apparent four and four encoding. + uint_fast8_t header_mask = 0xff; + for(auto c : header) header_mask &= c; + header_mask &= 0xaa; + if(header_mask != 0xaa) return nullptr; + + // Allocate a sector and fill the header fields. + auto sector = std::make_unique(); + sector->data.resize(data_size); + + sector->address.volume = ((header[0] << 1) | 1) & header[1]; + sector->address.track = ((header[2] << 1) | 1) & header[3]; + sector->address.sector = ((header[4] << 1) | 1) & header[5]; + + // Check the header checksum. + // The 0x11 is reverse engineered from the game 'Alien Rain' and is present even on the boot sector, + // so probably isn't copy protection? + uint_fast8_t checksum = (((header[6] << 1) | 1) & header[7]) ^ (is_five_and_three ? 0x11 : 0x00); + if(checksum != (sector->address.volume^sector->address.track^sector->address.sector)) return nullptr; + + // Unmap the sector contents. + for(size_t index = 0; index < data_size; ++index) { + sector->data[index] = is_five_and_three ? unmap_five_and_three(original->data[index]) : unmap_six_and_two(original->data[index]); + if(sector->data[index] == 0xff) { + return nullptr; + } + } + + // Undo the XOR step on sector contents and check that checksum. + for(std::size_t c = 1; c < sector->data.size(); ++c) { + sector->data[c] ^= sector->data[c-1]; + } + if(sector->data.back()) return nullptr; + + // Having checked the checksum, remove it. + sector->data.resize(sector->data.size() - 1); + + if(is_five_and_three) { + // TODO: the below is almost certainly incorrect; Beneath Apple DOS partly documents + // the process, enough to give the basic outline below of how five source bytes are + // mapped to eight five-bit quantities, but isn't clear on the order those bytes will + // end up in on disk. + + std::vector buffer(256); + for(size_t c = 0; c < 0x33; ++c) { + const uint8_t *const base = §or->data[0x032 - c]; + + buffer[(c * 5) + 0] = uint8_t((base[0x000] << 3) | (base[0x100] >> 2)); + buffer[(c * 5) + 1] = uint8_t((base[0x033] << 3) | (base[0x133] >> 2)); + buffer[(c * 5) + 2] = uint8_t((base[0x066] << 3) | (base[0x166] >> 2)); + buffer[(c * 5) + 3] = uint8_t((base[0x099] << 3) | ((base[0x100] & 2) << 1) | (base[0x133] & 2) | ((base[0x166] & 2) >> 1)); + buffer[(c * 5) + 4] = uint8_t((base[0x0cc] << 3) | ((base[0x100] & 1) << 2) | ((base[0x133] & 1) << 1) | (base[0x166] & 1)); + } + buffer[255] = uint8_t((sector->data[0x0ff] << 3) | (sector->data[0x199] >> 2)); + + sector->data = std::move(buffer); + sector->encoding = Sector::Encoding::FiveAndThree; + } else { + // Undo the 6 and 2 mapping. + const uint8_t bit_reverse[] = {0, 2, 1, 3}; + #define unmap(byte, nibble, shift) \ + sector->data[86 + byte] = uint8_t(\ + (sector->data[86 + byte] << 2) | bit_reverse[(sector->data[nibble] >> shift)&3]); + + for(std::size_t c = 0; c < 84; ++c) { + unmap(c, c, 0); + unmap(c+86, c, 2); + unmap(c+172, c, 4); + } + + unmap(84, 84, 0); + unmap(170, 84, 2); + unmap(85, 85, 0); + unmap(171, 85, 2); + + #undef unmap + + // Throw away the collection of two-bit chunks from the start of the sector. + sector->data.erase(sector->data.begin(), sector->data.end() - 256); + + sector->encoding = Sector::Encoding::SixAndTwo; + } + + // Return successfully. + return sector; +} + +} std::map Storage::Encodings::AppleGCR::sectors_from_segment(const Disk::PCMSegment &segment) { std::map result; @@ -109,7 +285,7 @@ std::map Storage::Encodings::AppleGCR::sectors_from_segment // If this is the start of a data section, and at least // one header has been witnessed, start a sector. - if(scanner[2] == data_prologue[2]) { + if(scanner[2] == data_prologue[2] || is_five_and_three) { new_sector = std::make_unique(); new_sector->data.reserve(710); } else { @@ -120,23 +296,12 @@ std::map Storage::Encodings::AppleGCR::sectors_from_segment } } else { if(new_sector) { - // Check whether the value just read is a legal GCR byte, in six-and-two - // encoding (which is a strict superset of five-and-three). + // Check whether the value just read is a legal GCR byte, for this sector; + // if not, or if const bool is_invalid = is_five_and_three ? (unmap_five_and_three(value) == 0xff) : (unmap_six_and_two(value) == 0xff); - if(is_invalid) { - // The second byte of the standard epilogue is illegal, so this still may - // be a valid sector. If the final byte was the first byte of an epilogue, - // chop it off and see whether the sector is otherwise intelligible. - - if(new_sector->data.empty() || new_sector->data.back() != epilogue[0]) { - // No sector found; reset scanning procedure. - new_sector.reset(); - pointer = scanning_sentinel; - continue; - } - - // Chop off the last byte. - new_sector->data.resize(new_sector->data.size() - 1); + if(is_invalid || new_sector->data.size() >= 704) { + // The second byte of the standard epilogue is 'illegal', as is the first byte of + // all prologues. So either a whole sector has been captured up to now, or it hasn't. // Move the sector elsewhere for processing; there's definitely no way to proceed with // the prospective sector if it doesn't parse. @@ -144,185 +309,19 @@ std::map Storage::Encodings::AppleGCR::sectors_from_segment new_sector.reset(); pointer = scanning_sentinel; - // Check for valid decoding options. - switch(sector->data.size()) { - default: // This is not a decodeable sector. - break; - - case 411: // Potentially this is an Apple II five-and-three sector. - case 343: { // Potentially this is an Apple II six-and-two sector. - // Check for apparent four and four encoding. - uint_fast8_t header_mask = 0xff; - for(auto c : header) header_mask &= c; - header_mask &= 0xaa; - if(header_mask != 0xaa) continue; - - sector->address.volume = ((header[0] << 1) | 1) & header[1]; - sector->address.track = ((header[2] << 1) | 1) & header[3]; - sector->address.sector = ((header[4] << 1) | 1) & header[5]; - - // Check the header checksum. - // The 0x11 is reverse engineered from the game 'Alien Rain' and is present even on the boot sector, - // so probably isn't copy protection? - uint_fast8_t checksum = (((header[6] << 1) | 1) & header[7]) ^ (is_five_and_three ? 0x11 : 0x00); - if(checksum != (sector->address.volume^sector->address.track^sector->address.sector)) continue; - - // Unmap the sector contents. - bool out_of_bounds = false; - for(auto &c : sector->data) { - c = is_five_and_three ? unmap_five_and_three(c) : unmap_six_and_two(c); - if(c == 0xff) { - out_of_bounds = true; - break; - } - } - if(out_of_bounds) continue; - - // Undo the XOR step on sector contents and check that checksum. - for(std::size_t c = 1; c < sector->data.size(); ++c) { - sector->data[c] ^= sector->data[c-1]; - } - if(sector->data.back()) continue; - - // Having checked the checksum, remove it. - sector->data.resize(sector->data.size() - 1); - - if(is_five_and_three) { - // TODO: the above is almost certainly incorrect; Beneath Apple DOS partly documents - // the process, enough to give the basic outline below of how five source bytes are - // mapped to eight five-bit quantities, but isn't clear on the order those bytes will - // end up in on disk. - - std::vector buffer(256); - for(size_t c = 0; c < 0x33; ++c) { - const uint8_t *const base = §or->data[0x032 - c]; - - buffer[(c * 5) + 0] = uint8_t((base[0x000] << 3) | (base[0x100] >> 2)); - buffer[(c * 5) + 1] = uint8_t((base[0x033] << 3) | (base[0x133] >> 2)); - buffer[(c * 5) + 2] = uint8_t((base[0x066] << 3) | (base[0x166] >> 2)); - buffer[(c * 5) + 3] = uint8_t((base[0x099] << 3) | ((base[0x100] & 2) << 1) | (base[0x133] & 2) | ((base[0x166] & 2) >> 1)); - buffer[(c * 5) + 4] = uint8_t((base[0x0cc] << 3) | ((base[0x100] & 1) << 2) | ((base[0x133] & 1) << 1) | (base[0x166] & 1)); - } - buffer[255] = uint8_t((sector->data[0x0ff] << 3) | (sector->data[0x199] >> 2)); - - sector->data = std::move(buffer); - sector->encoding = Sector::Encoding::FiveAndThree; - } else { - // Undo the 6 and 2 mapping. - const uint8_t bit_reverse[] = {0, 2, 1, 3}; - #define unmap(byte, nibble, shift) \ - sector->data[86 + byte] = uint8_t(\ - (sector->data[86 + byte] << 2) | bit_reverse[(sector->data[nibble] >> shift)&3]); - - for(std::size_t c = 0; c < 84; ++c) { - unmap(c, c, 0); - unmap(c+86, c, 2); - unmap(c+172, c, 4); - } - - unmap(84, 84, 0); - unmap(170, 84, 2); - unmap(85, 85, 0); - unmap(171, 85, 2); - - #undef unmap - - // Throw away the collection of two-bit chunks from the start of the sector. - sector->data.erase(sector->data.begin(), sector->data.end() - 256); - - sector->encoding = Sector::Encoding::SixAndTwo; - } - // Add this sector to the map. - result.insert(std::make_pair(sector_location, std::move(*sector))); - } break; - - case 704: { // Potentially this is a Macintosh sector. - // Attempt a six-and-two unmapping of the header. - std::array decoded_header; - bool out_of_bounds = false; - for(size_t c = 0; c < decoded_header.size(); ++c) { - decoded_header[c] = unmap_six_and_two(header[c]); - if(decoded_header[c] == 0xff) { - out_of_bounds = true; - break; - } - } - if(out_of_bounds) { - continue; - } - - // Test the checksum. - if(decoded_header[4] != (decoded_header[0] ^ decoded_header[1] ^ decoded_header[2] ^ decoded_header[3])) - sector->has_header_checksum_error = true; - - // Decode the header. - sector->address.track = uint8_t(decoded_header[0] | ((decoded_header[2]&0x1f) << 6)); - sector->address.sector = decoded_header[1]; - sector->address.format = decoded_header[3]; - sector->address.is_side_two = decoded_header[2] & 0x20; - - // Reverse the GCR encoding of the sector contents to get back to 6-bit data. - for(auto &c: sector->data) { - c = unmap_six_and_two(c); - if(c == 0xff) { - out_of_bounds = true; - break; - } - } - if(out_of_bounds) { - continue; - } - - // The first byte in the sector is a repeat of the sector number; test it - // for correctness. - if(sector->data[0] != sector->address.sector) { - continue; - } - - // Cf. the corresponding section of Encoder.cpp for logic below. - int checksum[3] = {0, 0, 0}; - for(size_t c = 0; c < 175; ++c) { - // Calculate the rolling checcksum in order to decode the bytes. - checksum[0] = (checksum[0] << 1) | (checksum[0] >> 7); - - // All offsets are +1 below, to skip the initial sector number duplicate. - const uint8_t top_bits = sector->data[1 + c*4]; - - // Decode first byte. - sector->data[0 + c * 3] = uint8_t((sector->data[2 + c*4] + ((top_bits & 0x30) << 2)) ^ checksum[0]); - checksum[2] += sector->data[0 + c * 3] + (checksum[0] >> 8); - - // Decode second byte; - sector->data[1 + c * 3] = uint8_t((sector->data[3 + c*4] + ((top_bits & 0x0c) << 4)) ^ checksum[2]); - checksum[1] += sector->data[1 + c * 3] + (checksum[2] >> 8); - - // Decode third byte, if there is one. - if(c != 174) { - sector->data[2 + c * 3] = uint8_t((sector->data[4 + c*4] + ((top_bits & 0x03) << 6)) ^ checksum[1]); - checksum[0] += sector->data[2 + c * 3] + (checksum[1] >> 8); - } - - // Reset carries. - checksum[0] &= 0xff; - checksum[1] &= 0xff; - checksum[2] &= 0xff; - } - - // Test the checksum. - if( - checksum[0] != uint8_t(sector->data[703] + ((sector->data[700] & 0x03) << 6)) || - checksum[1] != uint8_t(sector->data[702] + ((sector->data[700] & 0x0c) << 4)) || - checksum[2] != uint8_t(sector->data[701] + ((sector->data[700] & 0x30) << 2)) - ) sector->has_data_checksum_error = true; - - // Chop to size, and that's that. - sector->data.resize(524); - - // Add this sector to the map. - sector->encoding = Sector::Encoding::Macintosh; - result.insert(std::make_pair(sector_location, std::move(*sector))); - } break; + // Potentially this is a Macintosh sector. + auto macintosh_sector = decode_macintosh_sector(header, sector); + if(macintosh_sector) { + result.insert(std::make_pair(sector_location, std::move(*macintosh_sector))); + continue; } + + // Apple II then? + auto appleii_sector = decode_appleii_sector(header, sector, is_five_and_three); + if(appleii_sector) { + result.insert(std::make_pair(sector_location, std::move(*appleii_sector))); + } + } else { new_sector->data.push_back(value); }