1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-09-27 18:55:48 +00:00

Use string similarity as a program differentiator.

This commit is contained in:
Thomas Harte 2024-05-21 21:49:30 -04:00
parent 0c683c2c81
commit 6d769c9e89
4 changed files with 86 additions and 6 deletions

View File

@ -201,5 +201,14 @@ std::unique_ptr<Catalogue> Analyser::Static::Acorn::GetADFSCatalogue(const std::
catalogue->files.push_back(std::move(new_file));
}
// Include the directory title.
const uint8_t *title;
if(catalogue->has_large_sectors) {
title = &root_directory[0x7dd];
} else {
title = &root_directory[0x4d9];
}
catalogue->name = std::string(reinterpret_cast<const char *>(title), 19);
return catalogue;
}

View File

@ -12,7 +12,10 @@
#include "Tape.hpp"
#include "Target.hpp"
#include "../../../Numeric/StringSimilarity.hpp"
#include <algorithm>
#include <map>
using namespace Analyser::Static::Acorn;
@ -148,10 +151,8 @@ Analyser::Static::TargetList Analyser::Static::Acorn::GetTargets(const Media &me
targetArchimedes->media.disks = media.disks;
// Also look for the best possible startup program name, if it can be discerned.
std::map<double, std::string, std::greater<double>> options;
for(const auto &file: adfs_catalogue->files) {
// Skip files that would have been caught by shift-restart if suitable.
if(file.name == "!System" || file.name == "!Boot") continue;
// Skip non-Pling files.
if(file.name[0] != '!') continue;
@ -167,9 +168,12 @@ Analyser::Static::TargetList Analyser::Static::Acorn::GetTargets(const Media &me
}
) != file.name.end();
if(targetArchimedes->main_program.empty() || !has_read) {
targetArchimedes->main_program = file.name;
}
const auto probability = Numeric::similarity(file.name, adfs_catalogue->name) * (has_read ? 0.5 : 1.0);
options.emplace(probability, file.name);
}
if(!options.empty()) {
targetArchimedes->main_program = options.begin()->second;
}
}
}

View File

@ -0,0 +1,65 @@
//
// StringSimilarity.hpp
// Clock Signal
//
// Created by Thomas Harte on 21/05/2024.
// Copyright © 2024 Thomas Harte. All rights reserved.
//
#ifndef StringSimilarity_hpp
#define StringSimilarity_hpp
#include <cstdint>
#include <set>
#include <string>
namespace Numeric {
/// Seeks to implement algorithm as per http://www.catalysoft.com/articles/StrikeAMatch.html
///
/// @returns A number in the range 0.0 to 1.0 indicating the similarity between two strings;
/// 1.0 is most similar, 0.0 is least.
double similarity(std::string_view first, std::string_view second) {
if(first.size() < 2 || second.size() < 2) {
return 0.0;
}
const auto pairs = [](std::string_view source) -> std::set<uint16_t> {
std::set<uint16_t> result;
for(std::size_t c = 0; c < source.size() - 1; c++) {
if(isalpha(source[c]) && isalpha(source[c+1])) {
result.insert(static_cast<uint16_t>(
(toupper(source[c]) << 8) |
toupper(source[c+1])
));
}
}
return result;
};
const auto first_pairs = pairs(first);
const auto second_pairs = pairs(second);
const auto denominator = static_cast<double>(first_pairs.size() + second_pairs.size());
std::size_t numerator = 0;
auto first_it = first_pairs.begin();
auto second_it = second_pairs.begin();
while(first_it != first_pairs.end() && second_it != second_pairs.end()) {
if(*first_it == *second_it) {
++numerator;
++first_it;
++second_it;
} else if(*first_it < *second_it) {
++first_it;
} else {
++second_it;
}
}
return static_cast<double>(numerator * 2) / denominator;
}
}
#endif /* StringSimilarity_h */

View File

@ -2268,6 +2268,7 @@
4BD9137D1F311BC5009BCF85 /* i8255.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = i8255.hpp; sourceTree = "<group>"; };
4BD91D762401C2B8007BDC91 /* PatrikRakTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PatrikRakTests.swift; sourceTree = "<group>"; };
4BD971382BFC3D9C00C907AA /* ArchimedesStaticAnalyserTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ArchimedesStaticAnalyserTests.mm; sourceTree = "<group>"; };
4BD9713A2BFD7E7100C907AA /* StringSimilarity.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = StringSimilarity.hpp; sourceTree = "<group>"; };
4BDA00D922E60EE300AC3CD0 /* ROMRequester.xib */ = {isa = PBXFileReference; lastKnownFileType = file.xib; path = ROMRequester.xib; sourceTree = "<group>"; };
4BDA00DE22E644AF00AC3CD0 /* CSROMReceiverView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CSROMReceiverView.h; sourceTree = "<group>"; };
4BDA00DF22E644AF00AC3CD0 /* CSROMReceiverView.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = CSROMReceiverView.m; sourceTree = "<group>"; };
@ -3528,6 +3529,7 @@
4BB5B995281B1D3E00522DA9 /* RegisterSizes.hpp */,
4BFEA2F12682A90200EBF94C /* Sizes.hpp */,
4281572E2AA0334300E16AA1 /* Carry.hpp */,
4BD9713A2BFD7E7100C907AA /* StringSimilarity.hpp */,
);
name = Numeric;
path = ../../Numeric;