mirror of
https://github.com/badvision/lawless-legends.git
synced 2024-10-11 18:23:48 +00:00
Better spelling suggestions, using a secondary algorithm as a tie-breaker to favor the most relevant suggestions
This commit is contained in:
parent
287ceca85f
commit
0ca78ec5f8
@ -143,7 +143,7 @@ public class DataUtilities {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Math.max(m, n) - dist[m][n];
|
return dist[m][n];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -155,7 +155,7 @@ public class DataUtilities {
|
|||||||
* @param c1
|
* @param c1
|
||||||
* @param c2
|
* @param c2
|
||||||
* @param width Search window size
|
* @param width Search window size
|
||||||
* @return Overall similarity score (higher is beter)
|
* @return Overall similarity score (higher is better)
|
||||||
*/
|
*/
|
||||||
public static double rankMatch(String c1, String c2, int width) {
|
public static double rankMatch(String c1, String c2, int width) {
|
||||||
double score = 0;
|
double score = 0;
|
||||||
|
@ -28,7 +28,6 @@ import org.badvision.outlaweditor.data.DataUtilities;
|
|||||||
*/
|
*/
|
||||||
public class SpellChecker {
|
public class SpellChecker {
|
||||||
private static HashMap<Character, Set<String>> dictionary;
|
private static HashMap<Character, Set<String>> dictionary;
|
||||||
private final double SIMILARITY_THRESHOLD = 0.5;
|
|
||||||
|
|
||||||
public SpellChecker() {
|
public SpellChecker() {
|
||||||
loadDictionary();
|
loadDictionary();
|
||||||
@ -41,15 +40,10 @@ public class SpellChecker {
|
|||||||
for (String word : words) {
|
for (String word : words) {
|
||||||
Set<Suggestion> suggestions = getSuggestions(word);
|
Set<Suggestion> suggestions = getSuggestions(word);
|
||||||
if (suggestions != null && !suggestions.isEmpty()) {
|
if (suggestions != null && !suggestions.isEmpty()) {
|
||||||
Suggestion first = suggestions.stream().findFirst().get();
|
SpellResponse.Source source = new SpellResponse.Source();
|
||||||
if (first.similarity == 1.0) {
|
source.start = pos;
|
||||||
continue;
|
source.word = word;
|
||||||
} else {
|
response.corrections.put(source, suggestions);
|
||||||
SpellResponse.Source source = new SpellResponse.Source();
|
|
||||||
source.start = pos;
|
|
||||||
source.word = word;
|
|
||||||
response.corrections.put(source, suggestions);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pos += word.length() + 1;
|
pos += word.length() + 1;
|
||||||
@ -86,16 +80,18 @@ public class SpellChecker {
|
|||||||
String lower = word.toLowerCase();
|
String lower = word.toLowerCase();
|
||||||
Character first = lower.charAt(0);
|
Character first = lower.charAt(0);
|
||||||
Set<String> words = dictionary.get(first);
|
Set<String> words = dictionary.get(first);
|
||||||
|
int length = lower.length();
|
||||||
|
double threshold = length <= 2 ? 0 : Math.log(length-1) * 1.75;
|
||||||
if (words != null) {
|
if (words != null) {
|
||||||
if (words.contains(lower)) {
|
if (lower.length() <= 2 || words.contains(lower)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
words.parallelStream().forEach((String dictWord) -> {
|
words.parallelStream().forEach((String dictWord) -> {
|
||||||
int distance = DataUtilities.levenshteinDistance(lower, dictWord);
|
int distance = DataUtilities.levenshteinDistance(lower, dictWord);
|
||||||
double similarity = distance / ((double) Math.max(lower.length(), dictWord.length()));
|
if (distance <= threshold) {
|
||||||
if (similarity >= SIMILARITY_THRESHOLD) {
|
|
||||||
Suggestion suggestion = new Suggestion();
|
Suggestion suggestion = new Suggestion();
|
||||||
suggestion.similarity = similarity;
|
suggestion.original = lower;
|
||||||
|
suggestion.similarity = distance;
|
||||||
suggestion.word = dictWord;
|
suggestion.word = dictWord;
|
||||||
suggestions.add(suggestion);
|
suggestions.add(suggestion);
|
||||||
}
|
}
|
||||||
|
@ -9,18 +9,43 @@
|
|||||||
*/
|
*/
|
||||||
package org.badvision.outlaweditor.spelling;
|
package org.badvision.outlaweditor.spelling;
|
||||||
|
|
||||||
|
import static org.badvision.outlaweditor.data.DataUtilities.rankMatch;
|
||||||
|
|
||||||
public class Suggestion implements Comparable<Suggestion> {
|
public class Suggestion implements Comparable<Suggestion> {
|
||||||
|
|
||||||
|
public String original;
|
||||||
public String word;
|
public String word;
|
||||||
public double similarity;
|
public double similarity;
|
||||||
|
private double similarityRank = -1;
|
||||||
|
|
||||||
public String getWord() {
|
public String getWord() {
|
||||||
return word;
|
return word;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getSimilarity() {
|
public double getSimilarity() {
|
||||||
return similarity;
|
return similarity;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(Suggestion o) {
|
public int compareTo(Suggestion o) {
|
||||||
return (int) Math.signum(o.similarity - similarity);
|
if (similarity == o.similarity) {
|
||||||
|
|
||||||
|
double rank1 = getSimilarityRank();
|
||||||
|
double rank2 = o.getSimilarityRank();
|
||||||
|
if (rank1 == rank2) {
|
||||||
|
return (word.compareTo(o.word));
|
||||||
|
} else {
|
||||||
|
// Normalize result to -1, 0 or 1 so there is no rounding issues!
|
||||||
|
return (int) Math.signum(rank2 - rank1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (int) Math.signum(similarity - o.similarity);
|
||||||
|
}
|
||||||
|
|
||||||
|
private double getSimilarityRank() {
|
||||||
|
if (similarityRank < 0) {
|
||||||
|
similarityRank = rankMatch(word, original, 3) + rankMatch(word, original, 2);
|
||||||
|
}
|
||||||
|
return similarityRank;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user