mirror of
https://github.com/TomHarte/CLK.git
synced 2025-04-05 04:37:41 +00:00
Use string similarity as a program differentiator.
This commit is contained in:
parent
0c683c2c81
commit
6d769c9e89
@ -201,5 +201,14 @@ std::unique_ptr<Catalogue> Analyser::Static::Acorn::GetADFSCatalogue(const std::
|
||||
catalogue->files.push_back(std::move(new_file));
|
||||
}
|
||||
|
||||
// Include the directory title.
|
||||
const uint8_t *title;
|
||||
if(catalogue->has_large_sectors) {
|
||||
title = &root_directory[0x7dd];
|
||||
} else {
|
||||
title = &root_directory[0x4d9];
|
||||
}
|
||||
catalogue->name = std::string(reinterpret_cast<const char *>(title), 19);
|
||||
|
||||
return catalogue;
|
||||
}
|
||||
|
@ -12,7 +12,10 @@
|
||||
#include "Tape.hpp"
|
||||
#include "Target.hpp"
|
||||
|
||||
#include "../../../Numeric/StringSimilarity.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
using namespace Analyser::Static::Acorn;
|
||||
|
||||
@ -148,10 +151,8 @@ Analyser::Static::TargetList Analyser::Static::Acorn::GetTargets(const Media &me
|
||||
targetArchimedes->media.disks = media.disks;
|
||||
|
||||
// Also look for the best possible startup program name, if it can be discerned.
|
||||
std::map<double, std::string, std::greater<double>> options;
|
||||
for(const auto &file: adfs_catalogue->files) {
|
||||
// Skip files that would have been caught by shift-restart if suitable.
|
||||
if(file.name == "!System" || file.name == "!Boot") continue;
|
||||
|
||||
// Skip non-Pling files.
|
||||
if(file.name[0] != '!') continue;
|
||||
|
||||
@ -167,9 +168,12 @@ Analyser::Static::TargetList Analyser::Static::Acorn::GetTargets(const Media &me
|
||||
}
|
||||
) != file.name.end();
|
||||
|
||||
if(targetArchimedes->main_program.empty() || !has_read) {
|
||||
targetArchimedes->main_program = file.name;
|
||||
}
|
||||
const auto probability = Numeric::similarity(file.name, adfs_catalogue->name) * (has_read ? 0.5 : 1.0);
|
||||
options.emplace(probability, file.name);
|
||||
}
|
||||
|
||||
if(!options.empty()) {
|
||||
targetArchimedes->main_program = options.begin()->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
65
Numeric/StringSimilarity.hpp
Normal file
65
Numeric/StringSimilarity.hpp
Normal file
@ -0,0 +1,65 @@
|
||||
//
|
||||
// StringSimilarity.hpp
|
||||
// Clock Signal
|
||||
//
|
||||
// Created by Thomas Harte on 21/05/2024.
|
||||
// Copyright © 2024 Thomas Harte. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef StringSimilarity_hpp
|
||||
#define StringSimilarity_hpp
|
||||
|
||||
#include <cstdint>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
namespace Numeric {
|
||||
|
||||
/// Seeks to implement algorithm as per http://www.catalysoft.com/articles/StrikeAMatch.html
|
||||
///
|
||||
/// @returns A number in the range 0.0 to 1.0 indicating the similarity between two strings;
|
||||
/// 1.0 is most similar, 0.0 is least.
|
||||
double similarity(std::string_view first, std::string_view second) {
|
||||
if(first.size() < 2 || second.size() < 2) {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
const auto pairs = [](std::string_view source) -> std::set<uint16_t> {
|
||||
std::set<uint16_t> result;
|
||||
for(std::size_t c = 0; c < source.size() - 1; c++) {
|
||||
if(isalpha(source[c]) && isalpha(source[c+1])) {
|
||||
result.insert(static_cast<uint16_t>(
|
||||
(toupper(source[c]) << 8) |
|
||||
toupper(source[c+1])
|
||||
));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
const auto first_pairs = pairs(first);
|
||||
const auto second_pairs = pairs(second);
|
||||
|
||||
const auto denominator = static_cast<double>(first_pairs.size() + second_pairs.size());
|
||||
|
||||
std::size_t numerator = 0;
|
||||
auto first_it = first_pairs.begin();
|
||||
auto second_it = second_pairs.begin();
|
||||
while(first_it != first_pairs.end() && second_it != second_pairs.end()) {
|
||||
if(*first_it == *second_it) {
|
||||
++numerator;
|
||||
++first_it;
|
||||
++second_it;
|
||||
} else if(*first_it < *second_it) {
|
||||
++first_it;
|
||||
} else {
|
||||
++second_it;
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<double>(numerator * 2) / denominator;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* StringSimilarity_h */
|
@ -2268,6 +2268,7 @@
|
||||
4BD9137D1F311BC5009BCF85 /* i8255.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = i8255.hpp; sourceTree = "<group>"; };
|
||||
4BD91D762401C2B8007BDC91 /* PatrikRakTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PatrikRakTests.swift; sourceTree = "<group>"; };
|
||||
4BD971382BFC3D9C00C907AA /* ArchimedesStaticAnalyserTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ArchimedesStaticAnalyserTests.mm; sourceTree = "<group>"; };
|
||||
4BD9713A2BFD7E7100C907AA /* StringSimilarity.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = StringSimilarity.hpp; sourceTree = "<group>"; };
|
||||
4BDA00D922E60EE300AC3CD0 /* ROMRequester.xib */ = {isa = PBXFileReference; lastKnownFileType = file.xib; path = ROMRequester.xib; sourceTree = "<group>"; };
|
||||
4BDA00DE22E644AF00AC3CD0 /* CSROMReceiverView.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CSROMReceiverView.h; sourceTree = "<group>"; };
|
||||
4BDA00DF22E644AF00AC3CD0 /* CSROMReceiverView.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = CSROMReceiverView.m; sourceTree = "<group>"; };
|
||||
@ -3528,6 +3529,7 @@
|
||||
4BB5B995281B1D3E00522DA9 /* RegisterSizes.hpp */,
|
||||
4BFEA2F12682A90200EBF94C /* Sizes.hpp */,
|
||||
4281572E2AA0334300E16AA1 /* Carry.hpp */,
|
||||
4BD9713A2BFD7E7100C907AA /* StringSimilarity.hpp */,
|
||||
);
|
||||
name = Numeric;
|
||||
path = ../../Numeric;
|
||||
|
Loading…
x
Reference in New Issue
Block a user