mirror of
https://github.com/badvision/lawless-legends.git
synced 2025-02-10 01:31:29 +00:00
Efficiency improvements in spell-checker.
1) Spell check is not run if value remains unchanged 2) Fast-failure added to distance calculation to throw out words that vary in length too much 3) Fast-failure added to distance calculation to exit as soon as the max threshold is reached
This commit is contained in:
parent
1032797515
commit
5634548879
@ -113,11 +113,13 @@ public class DataUtilities {
|
||||
*
|
||||
* @param s
|
||||
* @param t
|
||||
* @param limit
|
||||
* @return Distance (higher is better)
|
||||
*/
|
||||
public static int levenshteinDistance(String s, String t) {
|
||||
if (s == null || t == null || s.length() == 0 || t.length() == 0) {
|
||||
return -1;
|
||||
public static int levenshteinDistance(String s, String t, int limit) {
|
||||
int sizeDiff = Math.abs(s.length() - t.length());
|
||||
if (sizeDiff > limit) {
|
||||
return sizeDiff;
|
||||
}
|
||||
|
||||
s = s.toLowerCase().replaceAll("[^a-zA-Z0-9\\s]", "");
|
||||
@ -132,6 +134,7 @@ public class DataUtilities {
|
||||
dist[0][i] = i;
|
||||
}
|
||||
for (int j = 1; j <= n; j++) {
|
||||
int min = 100;
|
||||
for (int i = 1; i <= m; i++) {
|
||||
if (s.charAt(i - 1) == t.charAt(j - 1)) {
|
||||
dist[i][j] = dist[i - 1][j - 1];
|
||||
@ -141,6 +144,10 @@ public class DataUtilities {
|
||||
int sub = dist[i - 1][j - 1] + 1;
|
||||
dist[i][j] = Math.min(Math.min(del, insert), sub);
|
||||
}
|
||||
min = Math.min(min, dist[i][j]);
|
||||
}
|
||||
if (min > limit) {
|
||||
return min;
|
||||
}
|
||||
}
|
||||
return dist[m][n];
|
||||
@ -191,8 +198,8 @@ public class DataUtilities {
|
||||
|
||||
@Override
|
||||
public int compare(String o1, String o2) {
|
||||
double s1 = levenshteinDistance(match, o1);
|
||||
double s2 = levenshteinDistance(match, o2);
|
||||
double s1 = levenshteinDistance(match, o1,20);
|
||||
double s2 = levenshteinDistance(match, o2,20);
|
||||
if (s2 == s1) {
|
||||
s1 = rankMatch(o1, match, 3) + rankMatch(o1, match, 2);
|
||||
s2 = rankMatch(o2, match, 3) + rankMatch(o2, match, 2);
|
||||
@ -224,7 +231,7 @@ public class DataUtilities {
|
||||
RankingComparator r = new RankingComparator(match);
|
||||
List<String> candidates = new ArrayList<>(search);
|
||||
Collections.sort(candidates, r);
|
||||
double score = levenshteinDistance(match, candidates.get(0));
|
||||
double score = levenshteinDistance(match, candidates.get(0), 20);
|
||||
if (score > 1) {
|
||||
return candidates.get(0);
|
||||
}
|
||||
|
@ -87,7 +87,7 @@ public class SpellChecker {
|
||||
return null;
|
||||
}
|
||||
words.parallelStream().forEach((String dictWord) -> {
|
||||
int distance = DataUtilities.levenshteinDistance(lower, dictWord);
|
||||
int distance = DataUtilities.levenshteinDistance(lower, dictWord, (int) threshold);
|
||||
if (distance <= threshold) {
|
||||
Suggestion suggestion = new Suggestion();
|
||||
suggestion.original = lower;
|
||||
@ -96,6 +96,13 @@ public class SpellChecker {
|
||||
suggestions.add(suggestion);
|
||||
}
|
||||
});
|
||||
if (suggestions.isEmpty()) {
|
||||
Suggestion suggestion = new Suggestion();
|
||||
suggestion.original = lower;
|
||||
suggestion.similarity = 100;
|
||||
suggestion.word = "????";
|
||||
suggestions.add(suggestion);
|
||||
}
|
||||
}
|
||||
|
||||
return suggestions;
|
||||
|
@ -502,9 +502,12 @@ if (typeof Mythos === "undefined") {
|
||||
}
|
||||
return new Blockly.FieldImage(file, 12, 12, '"');
|
||||
},
|
||||
checkSpelling: function(value) {
|
||||
checkSpelling: function(value) {
|
||||
if (this.sourceBlock_) {
|
||||
this.sourceBlock_.setCommentText(Mythos.editor.checkSpelling(value));
|
||||
if (value !== this.lastSpellCheck_) {
|
||||
this.sourceBlock_.setCommentText(Mythos.editor.checkSpelling(value));
|
||||
}
|
||||
this.lastSpellCheck_ = value;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user