From 6d769c9e89802d588e54af76b2b7c9a2b916c149 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Tue, 21 May 2024 21:49:30 -0400 Subject: [PATCH] Use string similarity as a program differentiator. --- Analyser/Static/Acorn/Disk.cpp | 9 +++ Analyser/Static/Acorn/StaticAnalyser.cpp | 16 +++-- Numeric/StringSimilarity.hpp | 65 +++++++++++++++++++ .../Clock Signal.xcodeproj/project.pbxproj | 2 + 4 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 Numeric/StringSimilarity.hpp diff --git a/Analyser/Static/Acorn/Disk.cpp b/Analyser/Static/Acorn/Disk.cpp index e0da241e6..e2c392d53 100644 --- a/Analyser/Static/Acorn/Disk.cpp +++ b/Analyser/Static/Acorn/Disk.cpp @@ -201,5 +201,14 @@ std::unique_ptr Analyser::Static::Acorn::GetADFSCatalogue(const std:: catalogue->files.push_back(std::move(new_file)); } + // Include the directory title. + const uint8_t *title; + if(catalogue->has_large_sectors) { + title = &root_directory[0x7dd]; + } else { + title = &root_directory[0x4d9]; + } + catalogue->name = std::string(reinterpret_cast(title), 19); + return catalogue; } diff --git a/Analyser/Static/Acorn/StaticAnalyser.cpp b/Analyser/Static/Acorn/StaticAnalyser.cpp index 846503221..612cdd083 100644 --- a/Analyser/Static/Acorn/StaticAnalyser.cpp +++ b/Analyser/Static/Acorn/StaticAnalyser.cpp @@ -12,7 +12,10 @@ #include "Tape.hpp" #include "Target.hpp" +#include "../../../Numeric/StringSimilarity.hpp" + #include +#include using namespace Analyser::Static::Acorn; @@ -148,10 +151,8 @@ Analyser::Static::TargetList Analyser::Static::Acorn::GetTargets(const Media &me targetArchimedes->media.disks = media.disks; // Also look for the best possible startup program name, if it can be discerned. + std::map> options; for(const auto &file: adfs_catalogue->files) { - // Skip files that would have been caught by shift-restart if suitable. - if(file.name == "!System" || file.name == "!Boot") continue; - // Skip non-Pling files. if(file.name[0] != '!') continue; @@ -167,9 +168,12 @@ Analyser::Static::TargetList Analyser::Static::Acorn::GetTargets(const Media &me } ) != file.name.end(); - if(targetArchimedes->main_program.empty() || !has_read) { - targetArchimedes->main_program = file.name; - } + const auto probability = Numeric::similarity(file.name, adfs_catalogue->name) * (has_read ? 0.5 : 1.0); + options.emplace(probability, file.name); + } + + if(!options.empty()) { + targetArchimedes->main_program = options.begin()->second; } } } diff --git a/Numeric/StringSimilarity.hpp b/Numeric/StringSimilarity.hpp new file mode 100644 index 000000000..d830bd988 --- /dev/null +++ b/Numeric/StringSimilarity.hpp @@ -0,0 +1,65 @@ +// +// StringSimilarity.hpp +// Clock Signal +// +// Created by Thomas Harte on 21/05/2024. +// Copyright © 2024 Thomas Harte. All rights reserved. +// + +#ifndef StringSimilarity_hpp +#define StringSimilarity_hpp + +#include +#include +#include + +namespace Numeric { + +/// Seeks to implement algorithm as per http://www.catalysoft.com/articles/StrikeAMatch.html +/// +/// @returns A number in the range 0.0 to 1.0 indicating the similarity between two strings; +/// 1.0 is most similar, 0.0 is least. +double similarity(std::string_view first, std::string_view second) { + if(first.size() < 2 || second.size() < 2) { + return 0.0; + } + + const auto pairs = [](std::string_view source) -> std::set { + std::set result; + for(std::size_t c = 0; c < source.size() - 1; c++) { + if(isalpha(source[c]) && isalpha(source[c+1])) { + result.insert(static_cast( + (toupper(source[c]) << 8) | + toupper(source[c+1]) + )); + } + } + return result; + }; + + const auto first_pairs = pairs(first); + const auto second_pairs = pairs(second); + + const auto denominator = static_cast(first_pairs.size() + second_pairs.size()); + + std::size_t numerator = 0; + auto first_it = first_pairs.begin(); + auto second_it = second_pairs.begin(); + while(first_it != first_pairs.end() && second_it != second_pairs.end()) { + if(*first_it == *second_it) { + ++numerator; + ++first_it; + ++second_it; + } else if(*first_it < *second_it) { + ++first_it; + } else { + ++second_it; + } + } + + return static_cast(numerator * 2) / denominator; +} + +} + +#endif /* StringSimilarity_h */ diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj index 1f8ca8671..341d4dbbf 100644 --- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj +++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj @@ -2268,6 +2268,7 @@ 4BD9137D1F311BC5009BCF85 /* i8255.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = i8255.hpp; sourceTree = ""; }; 4BD91D762401C2B8007BDC91 /* PatrikRakTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PatrikRakTests.swift; sourceTree = ""; }; 4BD971382BFC3D9C00C907AA /* ArchimedesStaticAnalyserTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ArchimedesStaticAnalyserTests.mm; sourceTree = ""; }; + 4BD9713A2BFD7E7100C907AA /* StringSimilarity.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = StringSimilarity.hpp; sourceTree = ""; }; 4BDA00D922E60EE300AC3CD0 /* ROMRequester.xib */ = {isa = PBXFileReference; lastKnownFileType = file.xib; path = ROMRequester.xib; sourceTree = ""; }; 4BDA00DE22E644AF00AC3CD0 /* CSROMReceiverView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CSROMReceiverView.h; sourceTree = ""; }; 4BDA00DF22E644AF00AC3CD0 /* CSROMReceiverView.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = CSROMReceiverView.m; sourceTree = ""; }; @@ -3528,6 +3529,7 @@ 4BB5B995281B1D3E00522DA9 /* RegisterSizes.hpp */, 4BFEA2F12682A90200EBF94C /* Sizes.hpp */, 4281572E2AA0334300E16AA1 /* Carry.hpp */, + 4BD9713A2BFD7E7100C907AA /* StringSimilarity.hpp */, ); name = Numeric; path = ../../Numeric;