diff --git a/OutlawEditor/src/main/java/org/badvision/outlaweditor/data/DataUtilities.java b/OutlawEditor/src/main/java/org/badvision/outlaweditor/data/DataUtilities.java index 5527fa09..6ad733bf 100644 --- a/OutlawEditor/src/main/java/org/badvision/outlaweditor/data/DataUtilities.java +++ b/OutlawEditor/src/main/java/org/badvision/outlaweditor/data/DataUtilities.java @@ -143,7 +143,7 @@ public class DataUtilities { } } } - return Math.max(m, n) - dist[m][n]; + return dist[m][n]; } /** @@ -155,7 +155,7 @@ public class DataUtilities { * @param c1 * @param c2 * @param width Search window size - * @return Overall similarity score (higher is beter) + * @return Overall similarity score (higher is better) */ public static double rankMatch(String c1, String c2, int width) { double score = 0; diff --git a/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/SpellChecker.java b/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/SpellChecker.java index 29e9033d..43965706 100644 --- a/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/SpellChecker.java +++ b/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/SpellChecker.java @@ -28,7 +28,6 @@ import org.badvision.outlaweditor.data.DataUtilities; */ public class SpellChecker { private static HashMap> dictionary; - private final double SIMILARITY_THRESHOLD = 0.5; public SpellChecker() { loadDictionary(); @@ -41,15 +40,10 @@ public class SpellChecker { for (String word : words) { Set suggestions = getSuggestions(word); if (suggestions != null && !suggestions.isEmpty()) { - Suggestion first = suggestions.stream().findFirst().get(); - if (first.similarity == 1.0) { - continue; - } else { - SpellResponse.Source source = new SpellResponse.Source(); - source.start = pos; - source.word = word; - response.corrections.put(source, suggestions); - } + SpellResponse.Source source = new SpellResponse.Source(); + source.start = pos; + source.word = word; + response.corrections.put(source, suggestions); } pos += word.length() + 1; @@ -86,16 +80,18 @@ public class SpellChecker { String lower = word.toLowerCase(); Character first = lower.charAt(0); Set words = dictionary.get(first); + int length = lower.length(); + double threshold = length <= 2 ? 0 : Math.log(length-1) * 1.75; if (words != null) { - if (words.contains(lower)) { + if (lower.length() <= 2 || words.contains(lower)) { return null; } words.parallelStream().forEach((String dictWord) -> { int distance = DataUtilities.levenshteinDistance(lower, dictWord); - double similarity = distance / ((double) Math.max(lower.length(), dictWord.length())); - if (similarity >= SIMILARITY_THRESHOLD) { + if (distance <= threshold) { Suggestion suggestion = new Suggestion(); - suggestion.similarity = similarity; + suggestion.original = lower; + suggestion.similarity = distance; suggestion.word = dictWord; suggestions.add(suggestion); } diff --git a/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/Suggestion.java b/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/Suggestion.java index 493fa14c..872bdf7d 100644 --- a/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/Suggestion.java +++ b/OutlawEditor/src/main/java/org/badvision/outlaweditor/spelling/Suggestion.java @@ -9,18 +9,43 @@ */ package org.badvision.outlaweditor.spelling; +import static org.badvision.outlaweditor.data.DataUtilities.rankMatch; + public class Suggestion implements Comparable { + + public String original; public String word; - public double similarity; + public double similarity; + private double similarityRank = -1; + public String getWord() { return word; } + public double getSimilarity() { return similarity; } @Override public int compareTo(Suggestion o) { - return (int) Math.signum(o.similarity - similarity); + if (similarity == o.similarity) { + + double rank1 = getSimilarityRank(); + double rank2 = o.getSimilarityRank(); + if (rank1 == rank2) { + return (word.compareTo(o.word)); + } else { + // Normalize result to -1, 0 or 1 so there is no rounding issues! + return (int) Math.signum(rank2 - rank1); + } + } + return (int) Math.signum(similarity - o.similarity); + } + + private double getSimilarityRank() { + if (similarityRank < 0) { + similarityRank = rankMatch(word, original, 3) + rankMatch(word, original, 2); + } + return similarityRank; } }