mirror of
https://github.com/badvision/lawless-legends.git
synced 2025-02-20 05:29:09 +00:00
Better spelling suggestions, using a secondary algorithm as a tie-breaker to favor the most relevant suggestions
This commit is contained in:
parent
287ceca85f
commit
0ca78ec5f8
@ -143,7 +143,7 @@ public class DataUtilities {
|
||||
}
|
||||
}
|
||||
}
|
||||
return Math.max(m, n) - dist[m][n];
|
||||
return dist[m][n];
|
||||
}
|
||||
|
||||
/**
|
||||
@ -155,7 +155,7 @@ public class DataUtilities {
|
||||
* @param c1
|
||||
* @param c2
|
||||
* @param width Search window size
|
||||
* @return Overall similarity score (higher is beter)
|
||||
* @return Overall similarity score (higher is better)
|
||||
*/
|
||||
public static double rankMatch(String c1, String c2, int width) {
|
||||
double score = 0;
|
||||
|
@ -28,7 +28,6 @@ import org.badvision.outlaweditor.data.DataUtilities;
|
||||
*/
|
||||
public class SpellChecker {
|
||||
private static HashMap<Character, Set<String>> dictionary;
|
||||
private final double SIMILARITY_THRESHOLD = 0.5;
|
||||
|
||||
public SpellChecker() {
|
||||
loadDictionary();
|
||||
@ -41,15 +40,10 @@ public class SpellChecker {
|
||||
for (String word : words) {
|
||||
Set<Suggestion> suggestions = getSuggestions(word);
|
||||
if (suggestions != null && !suggestions.isEmpty()) {
|
||||
Suggestion first = suggestions.stream().findFirst().get();
|
||||
if (first.similarity == 1.0) {
|
||||
continue;
|
||||
} else {
|
||||
SpellResponse.Source source = new SpellResponse.Source();
|
||||
source.start = pos;
|
||||
source.word = word;
|
||||
response.corrections.put(source, suggestions);
|
||||
}
|
||||
SpellResponse.Source source = new SpellResponse.Source();
|
||||
source.start = pos;
|
||||
source.word = word;
|
||||
response.corrections.put(source, suggestions);
|
||||
}
|
||||
|
||||
pos += word.length() + 1;
|
||||
@ -86,16 +80,18 @@ public class SpellChecker {
|
||||
String lower = word.toLowerCase();
|
||||
Character first = lower.charAt(0);
|
||||
Set<String> words = dictionary.get(first);
|
||||
int length = lower.length();
|
||||
double threshold = length <= 2 ? 0 : Math.log(length-1) * 1.75;
|
||||
if (words != null) {
|
||||
if (words.contains(lower)) {
|
||||
if (lower.length() <= 2 || words.contains(lower)) {
|
||||
return null;
|
||||
}
|
||||
words.parallelStream().forEach((String dictWord) -> {
|
||||
int distance = DataUtilities.levenshteinDistance(lower, dictWord);
|
||||
double similarity = distance / ((double) Math.max(lower.length(), dictWord.length()));
|
||||
if (similarity >= SIMILARITY_THRESHOLD) {
|
||||
if (distance <= threshold) {
|
||||
Suggestion suggestion = new Suggestion();
|
||||
suggestion.similarity = similarity;
|
||||
suggestion.original = lower;
|
||||
suggestion.similarity = distance;
|
||||
suggestion.word = dictWord;
|
||||
suggestions.add(suggestion);
|
||||
}
|
||||
|
@ -9,18 +9,43 @@
|
||||
*/
|
||||
package org.badvision.outlaweditor.spelling;
|
||||
|
||||
import static org.badvision.outlaweditor.data.DataUtilities.rankMatch;
|
||||
|
||||
public class Suggestion implements Comparable<Suggestion> {
|
||||
|
||||
public String original;
|
||||
public String word;
|
||||
public double similarity;
|
||||
public double similarity;
|
||||
private double similarityRank = -1;
|
||||
|
||||
public String getWord() {
|
||||
return word;
|
||||
}
|
||||
|
||||
public double getSimilarity() {
|
||||
return similarity;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(Suggestion o) {
|
||||
return (int) Math.signum(o.similarity - similarity);
|
||||
if (similarity == o.similarity) {
|
||||
|
||||
double rank1 = getSimilarityRank();
|
||||
double rank2 = o.getSimilarityRank();
|
||||
if (rank1 == rank2) {
|
||||
return (word.compareTo(o.word));
|
||||
} else {
|
||||
// Normalize result to -1, 0 or 1 so there is no rounding issues!
|
||||
return (int) Math.signum(rank2 - rank1);
|
||||
}
|
||||
}
|
||||
return (int) Math.signum(similarity - o.similarity);
|
||||
}
|
||||
|
||||
private double getSimilarityRank() {
|
||||
if (similarityRank < 0) {
|
||||
similarityRank = rankMatch(word, original, 3) + rankMatch(word, original, 2);
|
||||
}
|
||||
return similarityRank;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user