mirror of https://github.com/ksherlock/ample.git
software search improvements
1. handle diacritics 2. strip leading "the " from titles and search string since it's often inconsistent
This commit is contained in:
parent
9bb0cc468a
commit
2b2f3a2fd8
|
@ -32,6 +32,8 @@
|
||||||
|
|
||||||
-(NSString *)fullName;
|
-(NSString *)fullName;
|
||||||
|
|
||||||
|
@property NSString *searchTitle;
|
||||||
|
|
||||||
@end
|
@end
|
||||||
|
|
||||||
@interface SoftwareSet : NSObject <NSFastEnumeration, AutoCompleteDelegate>
|
@interface SoftwareSet : NSObject <NSFastEnumeration, AutoCompleteDelegate>
|
||||||
|
|
|
@ -614,14 +614,26 @@ NSArray<SoftwareList *> *SoftwareListForMachine(NSString *machine) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// NSStringTransformStripDiacritics
|
|
||||||
// pre-process all entries to lowercase and remove diacritics (second string for search text?)
|
static NSString *SearchString(NSString *s) {
|
||||||
#if 0
|
|
||||||
static unichar diacritics[][2] = {
|
s = [s stringByFoldingWithOptions: NSCaseInsensitiveSearch|NSWidthInsensitiveSearch|NSDiacriticInsensitiveSearch locale: nil];
|
||||||
{ 0xd8, 'O' }, // Ø
|
|
||||||
{ 0xf8, 'o' }, // ø
|
// strip leading "the " ???
|
||||||
};
|
NSUInteger l = [s length];
|
||||||
#endif
|
if (l > 4) {
|
||||||
|
unichar buffer[4];
|
||||||
|
static unichar the_l[4] = { 't', 'h', 'e', ' '};
|
||||||
|
static unichar the_u[4] = { 'T', 'H', 'E', ' '};
|
||||||
|
|
||||||
|
[s getCharacters: buffer range: NSMakeRange(0, 4)];
|
||||||
|
if (!memcmp(buffer, the_l, sizeof(buffer))) return [s substringFromIndex: 4];
|
||||||
|
if (!memcmp(buffer, the_u, sizeof(buffer))) return [s substringFromIndex: 4];
|
||||||
|
}
|
||||||
|
if (l > 256) return [s substringToIndex: 256];
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
- (nonnull NSArray<id<AutocompleteItem>> *)autocomplete:(nonnull AutocompleteControl *)control completionsForString:(nonnull NSString *)string {
|
- (nonnull NSArray<id<AutocompleteItem>> *)autocomplete:(nonnull AutocompleteControl *)control completionsForString:(nonnull NSString *)string {
|
||||||
|
|
||||||
if (!_cache) {
|
if (!_cache) {
|
||||||
|
@ -629,27 +641,29 @@ static unichar diacritics[][2] = {
|
||||||
[_cache setCountLimit: 10];
|
[_cache setCountLimit: 10];
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo -- diacritic normalization.
|
|
||||||
// déjá vu -> deja vu
|
|
||||||
|
|
||||||
|
// diacritic normalization.
|
||||||
|
// déjá vu -> deja vu
|
||||||
|
string = SearchString(string);
|
||||||
|
|
||||||
enum { max_haystack_length = 256, max_needle_length = 256 };
|
enum { max_haystack_length = 256, max_needle_length = 256 };
|
||||||
|
|
||||||
unichar needle_data[max_needle_length];
|
unichar needle_data[max_needle_length];
|
||||||
|
|
||||||
if (!_items) return @[];
|
if (!_items) return @[];
|
||||||
|
|
||||||
//string = [string stringByApplyingTransform: NSStringTransformStripDiacritics reverse: NO];
|
|
||||||
|
|
||||||
NSUInteger needle_length = [string length];
|
NSUInteger needle_length = [string length];
|
||||||
needle_length = MIN(needle_length, max_needle_length);
|
needle_length = MIN(needle_length, max_needle_length);
|
||||||
|
|
||||||
[string getCharacters: needle_data range: NSMakeRange(0, needle_length)];
|
[string getCharacters: needle_data range: NSMakeRange(0, needle_length)];
|
||||||
|
|
||||||
|
// based on testing, NSCaseInsensitiveSearch uses lowercase but it's not guaranteed.
|
||||||
for (NSUInteger i = 0; i < needle_length; ++i)
|
for (NSUInteger i = 0; i < needle_length; ++i)
|
||||||
needle_data[i] = towlower(needle_data[i]);
|
needle_data[i] = towlower(needle_data[i]);
|
||||||
|
|
||||||
string = InternString([NSString stringWithCharacters: needle_data length: needle_length]);
|
string = InternString([NSString stringWithCharacters: needle_data length: needle_length]);
|
||||||
|
|
||||||
|
|
||||||
NSArray *a = [_cache objectForKey: string];
|
NSArray *a = [_cache objectForKey: string];
|
||||||
if (a) return a;
|
if (a) return a;
|
||||||
|
|
||||||
|
@ -688,7 +702,12 @@ static unichar diacritics[][2] = {
|
||||||
if (!memcmp(haystack_data, needle_data_ptr, needle_length * sizeof(unichar))) return YES;
|
if (!memcmp(haystack_data, needle_data_ptr, needle_length * sizeof(unichar))) return YES;
|
||||||
}
|
}
|
||||||
|
|
||||||
haystack = [o title];
|
haystack = [o searchTitle];
|
||||||
|
if (!haystack) {
|
||||||
|
haystack = SearchString([o title]);
|
||||||
|
[o setSearchTitle: haystack];
|
||||||
|
}
|
||||||
|
|
||||||
length = [haystack length];
|
length = [haystack length];
|
||||||
length = MIN(length, max_haystack_length);
|
length = MIN(length, max_haystack_length);
|
||||||
if (length >= needle_length) {
|
if (length >= needle_length) {
|
||||||
|
|
Loading…
Reference in New Issue