mirror of
https://github.com/CamHenlin/MessagesForMacintosh.git
synced 2025-02-19 11:31:10 +00:00
139 lines
3.9 KiB
Plaintext
139 lines
3.9 KiB
Plaintext
// @flow strict
|
|
import naturalCompare from './naturalCompare';
|
|
|
|
/**
|
|
* Given an invalid input string and a list of valid options, returns a filtered
|
|
* list of valid options sorted based on their similarity with the input.
|
|
*/
|
|
export default function suggestionList(
|
|
input: string,
|
|
options: $ReadOnlyArray<string>,
|
|
): Array<string> {
|
|
const optionsByDistance = Object.create(null);
|
|
const lexicalDistance = new LexicalDistance(input);
|
|
|
|
const threshold = Math.floor(input.length * 0.4) + 1;
|
|
for (const option of options) {
|
|
const distance = lexicalDistance.measure(option, threshold);
|
|
if (distance !== undefined) {
|
|
optionsByDistance[option] = distance;
|
|
}
|
|
}
|
|
|
|
return Object.keys(optionsByDistance).sort((a, b) => {
|
|
const distanceDiff = optionsByDistance[a] - optionsByDistance[b];
|
|
return distanceDiff !== 0 ? distanceDiff : naturalCompare(a, b);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Computes the lexical distance between strings A and B.
|
|
*
|
|
* The "distance" between two strings is given by counting the minimum number
|
|
* of edits needed to transform string A into string B. An edit can be an
|
|
* insertion, deletion, or substitution of a single character, or a swap of two
|
|
* adjacent characters.
|
|
*
|
|
* Includes a custom alteration from Damerau-Levenshtein to treat case changes
|
|
* as a single edit which helps identify mis-cased values with an edit distance
|
|
* of 1.
|
|
*
|
|
* This distance can be useful for detecting typos in input or sorting
|
|
*/
|
|
class LexicalDistance {
|
|
_input: string;
|
|
_inputLowerCase: string;
|
|
_inputArray: Array<number>;
|
|
_rows: [Array<number>, Array<number>, Array<number>];
|
|
|
|
constructor(input: string) {
|
|
this._input = input;
|
|
this._inputLowerCase = input.toLowerCase();
|
|
this._inputArray = stringToArray(this._inputLowerCase);
|
|
|
|
this._rows = [
|
|
new Array(input.length + 1).fill(0),
|
|
new Array(input.length + 1).fill(0),
|
|
new Array(input.length + 1).fill(0),
|
|
];
|
|
}
|
|
|
|
measure(option: string, threshold: number): number | void {
|
|
if (this._input === option) {
|
|
return 0;
|
|
}
|
|
|
|
const optionLowerCase = option.toLowerCase();
|
|
|
|
// Any case change counts as a single edit
|
|
if (this._inputLowerCase === optionLowerCase) {
|
|
return 1;
|
|
}
|
|
|
|
let a = stringToArray(optionLowerCase);
|
|
let b = this._inputArray;
|
|
|
|
if (a.length < b.length) {
|
|
const tmp = a;
|
|
a = b;
|
|
b = tmp;
|
|
}
|
|
const aLength = a.length;
|
|
const bLength = b.length;
|
|
|
|
if (aLength - bLength > threshold) {
|
|
return undefined;
|
|
}
|
|
|
|
const rows = this._rows;
|
|
for (let j = 0; j <= bLength; j++) {
|
|
rows[0][j] = j;
|
|
}
|
|
|
|
for (let i = 1; i <= aLength; i++) {
|
|
const upRow = rows[(i - 1) % 3];
|
|
const currentRow = rows[i % 3];
|
|
|
|
let smallestCell = (currentRow[0] = i);
|
|
for (let j = 1; j <= bLength; j++) {
|
|
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
|
|
let currentCell = Math.min(
|
|
upRow[j] + 1, // delete
|
|
currentRow[j - 1] + 1, // insert
|
|
upRow[j - 1] + cost, // substitute
|
|
);
|
|
|
|
if (i > 1 && j > 1 && a[i - 1] === b[j - 2] && a[i - 2] === b[j - 1]) {
|
|
// transposition
|
|
const doubleDiagonalCell = rows[(i - 2) % 3][j - 2];
|
|
currentCell = Math.min(currentCell, doubleDiagonalCell + 1);
|
|
}
|
|
|
|
if (currentCell < smallestCell) {
|
|
smallestCell = currentCell;
|
|
}
|
|
|
|
currentRow[j] = currentCell;
|
|
}
|
|
|
|
// Early exit, since distance can't go smaller than smallest element of the previous row.
|
|
if (smallestCell > threshold) {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
const distance = rows[aLength % 3][bLength];
|
|
return distance <= threshold ? distance : undefined;
|
|
}
|
|
}
|
|
|
|
function stringToArray(str: string): Array<number> {
|
|
const strLength = str.length;
|
|
const array = new Array(strLength);
|
|
for (let i = 0; i < strLength; ++i) {
|
|
array[i] = str.charCodeAt(i);
|
|
}
|
|
return array;
|
|
}
|