Extract text matching to it's own file.
This commit is contained in:
parent
f6d3169bfc
commit
77cd2b32c7
40
inc/match.h
Normal file
40
inc/match.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2022 Scoopta
|
||||
* This file is part of Wofi
|
||||
* Wofi is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Wofi is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Wofi. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef MATCH_H
|
||||
#define MATCH_H
|
||||
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef double score_t;
|
||||
#define SCORE_MAX INFINITY
|
||||
#define SCORE_MIN -INFINITY
|
||||
#define MATCH_FUZZY_MAX_LEN 256
|
||||
#define MAX_MULTI_CONTAINS_FILTER_SIZE 256
|
||||
|
||||
enum matching_mode {
|
||||
MATCHING_MODE_CONTAINS,
|
||||
MATCHING_MODE_MULTI_CONTAINS,
|
||||
MATCHING_MODE_FUZZY
|
||||
};
|
||||
|
||||
int sort_for_matching_mode(const char *text1, const char *text2, int fallback,
|
||||
enum matching_mode match_type, const char *filter, bool insensitive);
|
||||
|
||||
bool match_for_matching_mode(const char* filter, const char* text, enum matching_mode matching, bool insensitive);
|
||||
#endif
|
@ -22,11 +22,6 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
typedef double score_t;
|
||||
#define SCORE_MAX INFINITY
|
||||
#define SCORE_MIN -INFINITY
|
||||
#define MATCH_FUZZY_MAX_LEN 256
|
||||
|
||||
time_t utils_get_time_millis(void);
|
||||
|
||||
void utils_sleep_millis(time_t millis);
|
||||
@ -39,8 +34,6 @@ size_t utils_min3(size_t n1, size_t n2, size_t n3);
|
||||
|
||||
size_t utils_distance(const char* haystack, const char* needle);
|
||||
|
||||
score_t utils_fuzzy_score(const char *haystack, const char *needle);
|
||||
|
||||
void utils_mkdir(char *path, mode_t mode);
|
||||
|
||||
#endif
|
||||
|
@ -23,6 +23,7 @@ add_project_link_arguments('-rdynamic', language : 'c')
|
||||
sources = ['src/config.c',
|
||||
'src/main.c',
|
||||
'src/map.c',
|
||||
'src/match.c',
|
||||
'src/property_box.c',
|
||||
'src/utils_g.c',
|
||||
'src/utils.c',
|
||||
|
412
src/match.c
Normal file
412
src/match.c
Normal file
@ -0,0 +1,412 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2022 Scoopta
|
||||
* This file is part of Wofi
|
||||
* Wofi is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Wofi is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Wofi. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <match.h>
|
||||
#include <string.h>
|
||||
|
||||
// leading gap
|
||||
#define SCORE_GAP_LEADING -0.005
|
||||
// trailing gap
|
||||
#define SCORE_GAP_TRAILING -0.005
|
||||
// gap in the middle
|
||||
#define SCORE_GAP_INNER -0.01
|
||||
// we matched the characters consecutively
|
||||
#define SCORE_MATCH_CONSECUTIVE 1.0
|
||||
// we got a consecutive match, but insensitive is on
|
||||
// and we didn't match the case.
|
||||
#define SCORE_MATCH_NOT_MATCH_CASE 0.9
|
||||
// we are matching after a slash
|
||||
#define SCORE_MATCH_SLASH 0.9
|
||||
// we are matching after a space dash or hyphen
|
||||
#define SCORE_MATCH_WORD 0.8
|
||||
// we are matching a camel case letter
|
||||
#define SCORE_MATCH_CAPITAL 0.7
|
||||
// we are matching after a dot
|
||||
#define SCORE_MATCH_DOT 0.6
|
||||
|
||||
#define SWAP(x, y, T) \
|
||||
do { \
|
||||
T SWAP = x; \
|
||||
x = y; \
|
||||
y = SWAP; \
|
||||
} while(0)
|
||||
|
||||
#define max(a, b) (((a) > (b)) ? (a) : (b))
|
||||
|
||||
// matching
|
||||
static bool contains_match(const char* filter, const char* text, bool insensitive) {
|
||||
if(filter == NULL || strcmp(filter, "") == 0) {
|
||||
return true;
|
||||
}
|
||||
if(text == NULL) {
|
||||
return false;
|
||||
}
|
||||
if(insensitive) {
|
||||
return strcasestr(text, filter) != NULL;
|
||||
} else {
|
||||
return strstr(text, filter) != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static char* strcasechr(const char* s,char c, bool insensitive) {
|
||||
if(insensitive) {
|
||||
const char accept[3] = {c, toupper(c), 0};
|
||||
return strpbrk(s, accept);
|
||||
} else {
|
||||
return strchr(s, c);
|
||||
}
|
||||
}
|
||||
|
||||
static bool fuzzy_match(const char* filter, const char* text, bool insensitive) {
|
||||
if(filter == NULL || strcmp(filter, "") == 0) {
|
||||
return true;
|
||||
}
|
||||
if(text == NULL) {
|
||||
return false;
|
||||
}
|
||||
// we just check that all the characters (ignoring case) are in the
|
||||
// search text possibly case insensitively in the correct order
|
||||
while(*filter) {
|
||||
char nch = *filter++;
|
||||
|
||||
if(!(text = strcasechr(text, nch, insensitive))) {
|
||||
return false;
|
||||
}
|
||||
text++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool multi_contains_match(const char* filter, const char* text, bool insensitive) {
|
||||
if(filter == NULL || strcmp(filter, "") == 0) {
|
||||
return true;
|
||||
}
|
||||
if(text == NULL) {
|
||||
return false;
|
||||
}
|
||||
char new_filter[MAX_MULTI_CONTAINS_FILTER_SIZE];
|
||||
strncpy(new_filter, filter, sizeof(new_filter));
|
||||
new_filter[sizeof(new_filter) - 1] = '\0';
|
||||
char* token;
|
||||
char* rest = new_filter;
|
||||
while((token = strtok_r(rest, " ", &rest))) {
|
||||
if(contains_match(token, text, insensitive) == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool match_for_matching_mode(const char* filter, const char* text,
|
||||
enum matching_mode matching, bool insensitive) {
|
||||
bool retval;
|
||||
switch(matching) {
|
||||
case MATCHING_MODE_MULTI_CONTAINS:
|
||||
retval = multi_contains_match(filter, text, insensitive);
|
||||
break;
|
||||
case MATCHING_MODE_CONTAINS:
|
||||
retval = contains_match(filter, text, insensitive);
|
||||
break;
|
||||
case MATCHING_MODE_FUZZY:
|
||||
retval = fuzzy_match(filter, text, insensitive);
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
// end matching
|
||||
|
||||
// fuzzy matching
|
||||
static void precompute_bonus(const char* haystack, score_t* match_bonus) {
|
||||
/* Which positions are beginning of words */
|
||||
int m = strlen(haystack);
|
||||
char last_ch = '\0';
|
||||
for(int i = 0; i < m; i++) {
|
||||
char ch = haystack[i];
|
||||
|
||||
score_t score = 0;
|
||||
if(isalnum(ch)) {
|
||||
if(!last_ch || last_ch == '/') {
|
||||
score = SCORE_MATCH_SLASH;
|
||||
} else if(last_ch == '-' || last_ch == '_' ||
|
||||
last_ch == ' ') {
|
||||
score = SCORE_MATCH_WORD;
|
||||
} else if(last_ch >= 'a' && last_ch <= 'z' &&
|
||||
ch >= 'A' && ch <= 'Z') {
|
||||
/* CamelCase */
|
||||
score = SCORE_MATCH_CAPITAL;
|
||||
} else if(last_ch == '.') {
|
||||
score = SCORE_MATCH_DOT;
|
||||
}
|
||||
}
|
||||
|
||||
match_bonus[i] = score;
|
||||
last_ch = ch;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool match_with_case(char a, char b, bool insensitive) {
|
||||
if(insensitive) {
|
||||
return tolower(a) == tolower(b);
|
||||
} else {
|
||||
return a == b;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void match_row(int row, score_t* curr_D, score_t* curr_M,
|
||||
const score_t* last_D, const score_t* last_M,
|
||||
const char* needle, const char* haystack, int n, int m, score_t* match_bonus, bool insensitive) {
|
||||
int i = row;
|
||||
|
||||
score_t prev_score = SCORE_MIN;
|
||||
score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
|
||||
|
||||
for(int j = 0; j < m; j++) {
|
||||
if(match_with_case(needle[i], haystack[j], insensitive)) {
|
||||
score_t score = SCORE_MIN;
|
||||
if(!i) {
|
||||
// first line we fill in a row for non-matching
|
||||
score = (j * SCORE_GAP_LEADING) + match_bonus[j];
|
||||
} else if(j) { /* i > 0 && j > 0*/
|
||||
// we definitely match case insensitively already so if
|
||||
// our character isn't the same then we have a different case
|
||||
score_t consecutive_bonus = needle[i] == haystack[j] ? SCORE_MATCH_CONSECUTIVE : SCORE_MATCH_NOT_MATCH_CASE;
|
||||
|
||||
score = max(last_M[j - 1] + match_bonus[j],
|
||||
/* consecutive match, doesn't stack
|
||||
with match_bonus */
|
||||
last_D[j - 1] + consecutive_bonus);
|
||||
}
|
||||
curr_D[j] = score;
|
||||
curr_M[j] = prev_score = max(score, prev_score + gap_score);
|
||||
} else {
|
||||
curr_D[j] = SCORE_MIN;
|
||||
curr_M[j] = prev_score = prev_score + gap_score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fuzzy matching scoring. Adapted from
|
||||
// https://github.com/jhawthorn/fzy/blob/master/src/match.c and
|
||||
// https://github.com/jhawthorn/fzy/blob/master/ALGORITHM.md
|
||||
// For a fuzzy match string needle being searched for in haystack we provide a
|
||||
// number score for how well we match.
|
||||
// We create two matrices of size needle_len (n) by haystack_len (m).
|
||||
// The first matrix is the score matrix. Each position (i,j) within this matrix
|
||||
// consists of the score that corresponds to the score that would be generated
|
||||
// by matching the first i characters of the needle with the first j
|
||||
// characters of the haystack. Gaps have a fixed penalty for having a gap along
|
||||
// with a linear penalty for gap size (c.f. gotoh's algorithm).
|
||||
// matches give a positive score, with a slight weight given to matches after
|
||||
// certain special characters (i.e. the first character after a `/` will be
|
||||
// "almost" consecutive but lower than an actual consecutive match).
|
||||
// Our second matrix is our diagonal matrix where we store the best match
|
||||
// that ends at a match. This allows us to calculate our gap penalties alongside
|
||||
// our consecutive match scores.
|
||||
// In addition, since we only rely on the current, and previous row of the
|
||||
// matrices and we only want to compute the score, we only store those scores
|
||||
// and reuse the previous rows (rather than storing the entire (n*m) matrix).
|
||||
// In addition we've simplified some of the algorithm compared to fzy to
|
||||
// improve legibility. (Can reimplement lookup tables later if wanted.)
|
||||
// Also, the reference algorithm does not take into account case sensitivity
|
||||
// which has been implemented here.
|
||||
|
||||
static score_t fuzzy_score(const char* haystack, const char* needle, bool insensitive) {
|
||||
if(!*needle)
|
||||
return SCORE_MIN;
|
||||
|
||||
int n = strlen(needle);
|
||||
int m = strlen(haystack);
|
||||
score_t match_bonus[m];
|
||||
precompute_bonus(haystack, match_bonus);
|
||||
|
||||
if(m > MATCH_FUZZY_MAX_LEN || n > m) {
|
||||
/*
|
||||
* Unreasonably large candidate: return no score
|
||||
* If it is a valid match it will still be returned, it will
|
||||
* just be ranked below any reasonably sized candidates
|
||||
*/
|
||||
return SCORE_MIN;
|
||||
} else if(n == m) {
|
||||
/* Since this method can only be called with a haystack which
|
||||
* matches needle. If the lengths of the strings are equal the
|
||||
* strings themselves must also be equal (ignoring case).
|
||||
*/
|
||||
return SCORE_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* D[][] Stores the best score for this position ending with a match.
|
||||
* M[][] Stores the best possible score at this position.
|
||||
*/
|
||||
score_t D[2][MATCH_FUZZY_MAX_LEN], M[2][MATCH_FUZZY_MAX_LEN];
|
||||
|
||||
score_t* last_D, *last_M;
|
||||
score_t* curr_D, *curr_M;
|
||||
|
||||
last_D = D[0];
|
||||
last_M = M[0];
|
||||
curr_D = D[1];
|
||||
curr_M = M[1];
|
||||
|
||||
for(int i = 0; i < n; i++) {
|
||||
match_row(i, curr_D, curr_M, last_D, last_M, needle, haystack, n, m, match_bonus, insensitive);
|
||||
|
||||
SWAP(curr_D, last_D, score_t *);
|
||||
SWAP(curr_M, last_M, score_t *);
|
||||
}
|
||||
|
||||
return last_M[m - 1];
|
||||
}
|
||||
// end fuzzy matching
|
||||
|
||||
// sorting
|
||||
static int fuzzy_sort(const char* text1, const char* text2, const char* filter, bool insensitive) {
|
||||
bool match1 = fuzzy_match(filter, text1, insensitive);
|
||||
bool match2 = fuzzy_match(filter, text2, insensitive);
|
||||
// both filters match do fuzzy scoring
|
||||
if(match1 && match2) {
|
||||
score_t dist1 = fuzzy_score(text1, filter, insensitive);
|
||||
score_t dist2 = fuzzy_score(text2, filter, insensitive);
|
||||
if(dist1 == dist2) {
|
||||
// same same
|
||||
return 0;
|
||||
} else if(dist1 > dist2) { // highest score wins.
|
||||
// text1 goes first
|
||||
return -1;
|
||||
} else {
|
||||
// text2 goes first
|
||||
return 1;
|
||||
}
|
||||
} else if(match1) {
|
||||
// text1 goes first
|
||||
return -1;
|
||||
} else if(match2) {
|
||||
// text2 goes first
|
||||
return 1;
|
||||
} else {
|
||||
// same same.
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// we sort based on how early in the string all the matches are.
|
||||
// if there are matches for each.
|
||||
static int multi_contains_sort(const char* text1, const char* text2, const char* filter, bool insensitive) {
|
||||
// sum of string positions of each match
|
||||
int t1_count = 0;
|
||||
int t2_count = 0;
|
||||
// does this string match with mult-contains
|
||||
bool t1_match = true;
|
||||
bool t2_match = true;
|
||||
|
||||
char new_filter[MAX_MULTI_CONTAINS_FILTER_SIZE];
|
||||
strncpy(new_filter, filter, sizeof(new_filter));
|
||||
new_filter[sizeof(new_filter) - 1] = '\0';
|
||||
|
||||
char* token;
|
||||
char* rest = new_filter;
|
||||
while((token = strtok_r(rest, " ", &rest))) {
|
||||
char* str1, *str2;
|
||||
if(insensitive) {
|
||||
str1 = strcasestr(text1, token);
|
||||
str2 = strcasestr(text2, token);
|
||||
} else {
|
||||
str1 = strstr(text1, token);
|
||||
str2 = strstr(text2, token);
|
||||
}
|
||||
t1_match = t1_match && str1 != NULL;
|
||||
t2_match = t2_match && str2 != NULL;
|
||||
if(str1 != NULL) {
|
||||
int pos1 = str1 - text1;
|
||||
t1_count += pos1;
|
||||
}
|
||||
if(str2 != NULL) {
|
||||
int pos2 = str2 - text2;
|
||||
t2_count += pos2;
|
||||
}
|
||||
}
|
||||
if(t1_match && t2_match) {
|
||||
// both match
|
||||
// return the one with the smallest count.
|
||||
return t1_count - t2_count;
|
||||
} else if(t1_match) {
|
||||
return -1;
|
||||
} else if(t2_match) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
static int contains_sort(const char* text1, const char* text2, const char* filter, bool insensitive) {
|
||||
char* str1, *str2;
|
||||
|
||||
if(insensitive) {
|
||||
str1 = strcasestr(text1, filter);
|
||||
str2 = strcasestr(text2, filter);
|
||||
} else {
|
||||
str1 = strstr(text1, filter);
|
||||
str2 = strstr(text2, filter);
|
||||
}
|
||||
bool tx1 = str1 == text1;
|
||||
bool tx2 = str2 == text2;
|
||||
bool txc1 = str1 != NULL;
|
||||
bool txc2 = str2 != NULL;
|
||||
|
||||
if(tx1 && tx2) {
|
||||
return 0;
|
||||
} else if(tx1) {
|
||||
return -1;
|
||||
} else if(tx2) {
|
||||
return 1;
|
||||
} else if(txc1 && txc2) {
|
||||
return 0;
|
||||
} else if(txc1) {
|
||||
return -1;
|
||||
} else if(txc2) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int sort_for_matching_mode(const char* text1, const char* text2, int fallback,
|
||||
enum matching_mode match_type, const char* filter, bool insensitive) {
|
||||
int primary = 0;
|
||||
switch(match_type) {
|
||||
case MATCHING_MODE_MULTI_CONTAINS:
|
||||
primary = multi_contains_sort(text1, text2, filter, insensitive);
|
||||
break;
|
||||
case MATCHING_MODE_CONTAINS:
|
||||
primary = contains_sort(text1, text2, filter, insensitive);
|
||||
break;
|
||||
case MATCHING_MODE_FUZZY:
|
||||
primary = fuzzy_sort(text1, text2, filter, insensitive);
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
if(primary == 0) {
|
||||
return fallback;
|
||||
}
|
||||
return primary;
|
||||
}
|
||||
// end sorting
|
||||
|
175
src/utils.c
175
src/utils.c
@ -17,11 +17,9 @@
|
||||
|
||||
#include <utils.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <libgen.h>
|
||||
#include <math.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -111,179 +109,6 @@ size_t utils_distance(const char* haystack, const char* needle) {
|
||||
return arr[str1_len][str2_len];
|
||||
}
|
||||
|
||||
// leading gap
|
||||
#define SCORE_GAP_LEADING -0.005
|
||||
// trailing gap
|
||||
#define SCORE_GAP_TRAILING -0.005
|
||||
// gap in the middle
|
||||
#define SCORE_GAP_INNER -0.01
|
||||
// we matched the characters consecutively
|
||||
#define SCORE_MATCH_CONSECUTIVE 1.0
|
||||
// we got a consecutive match, but insensitive is on
|
||||
// and we didn't match the case.
|
||||
#define SCORE_MATCH_NOT_MATCH_CASE 0.9
|
||||
// we are matching after a slash
|
||||
#define SCORE_MATCH_SLASH 0.9
|
||||
// we are matching after a space dash or hyphen
|
||||
#define SCORE_MATCH_WORD 0.8
|
||||
// we are matching a camel case letter
|
||||
#define SCORE_MATCH_CAPITAL 0.7
|
||||
// we are matching after a dot
|
||||
#define SCORE_MATCH_DOT 0.6
|
||||
|
||||
#define SWAP(x, y, T) \
|
||||
do { \
|
||||
T SWAP = x; \
|
||||
x = y; \
|
||||
y = SWAP; \
|
||||
} while (0)
|
||||
|
||||
#define max(a, b) (((a) > (b)) ? (a) : (b))
|
||||
|
||||
static void precompute_bonus(const char *haystack, score_t *match_bonus) {
|
||||
/* Which positions are beginning of words */
|
||||
int m = strlen(haystack);
|
||||
char last_ch = '\0';
|
||||
for (int i = 0; i < m; i++) {
|
||||
char ch = haystack[i];
|
||||
|
||||
score_t score = 0;
|
||||
if (isalnum(ch)) {
|
||||
if (!last_ch || last_ch == '/') {
|
||||
score = SCORE_MATCH_SLASH;
|
||||
} else if (last_ch == '-' || last_ch == '_' ||
|
||||
last_ch == ' ') {
|
||||
score = SCORE_MATCH_WORD;
|
||||
} else if (last_ch >= 'a' && last_ch <= 'z' &&
|
||||
ch >= 'A' && ch <= 'Z') {
|
||||
/* CamelCase */
|
||||
score = SCORE_MATCH_CAPITAL;
|
||||
} else if (last_ch == '.') {
|
||||
score = SCORE_MATCH_DOT;
|
||||
}
|
||||
}
|
||||
|
||||
match_bonus[i] = score;
|
||||
last_ch = ch;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool match_with_case(char a, char b, bool insensitive) {
|
||||
if(insensitive) {
|
||||
return tolower(a) == tolower(b);
|
||||
} else {
|
||||
return a == b;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void match_row(int row, score_t* curr_D, score_t* curr_M,
|
||||
const score_t* last_D, const score_t * last_M,
|
||||
const char* needle, const char* haystack, int n, int m, score_t* match_bonus) {
|
||||
int i = row;
|
||||
|
||||
score_t prev_score = SCORE_MIN;
|
||||
score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER;
|
||||
|
||||
for (int j = 0; j < m; j++) {
|
||||
if (match_with_case(needle[i], haystack[j], true)) {
|
||||
score_t score = SCORE_MIN;
|
||||
if (!i) {
|
||||
// first line we fill in a row for non-matching
|
||||
score = (j * SCORE_GAP_LEADING) + match_bonus[j];
|
||||
} else if (j) { /* i > 0 && j > 0*/
|
||||
// we definitely match case insensitively already so if
|
||||
// our character isn't the same then we have a
|
||||
// different case
|
||||
score_t consecutive_bonus = needle[i] == haystack[j] ? SCORE_MATCH_CONSECUTIVE : SCORE_MATCH_NOT_MATCH_CASE;
|
||||
|
||||
score = max(last_M[j - 1] + match_bonus[j],
|
||||
/* consecutive match, doesn't stack
|
||||
with match_bonus */
|
||||
last_D[j - 1] + consecutive_bonus);
|
||||
}
|
||||
curr_D[j] = score;
|
||||
curr_M[j] = prev_score = max(score, prev_score + gap_score);
|
||||
} else {
|
||||
curr_D[j] = SCORE_MIN;
|
||||
curr_M[j] = prev_score = prev_score + gap_score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fuzzy matching scoring. Adapted from
|
||||
// https://github.com/jhawthorn/fzy/blob/master/src/match.c and
|
||||
// https://github.com/jhawthorn/fzy/blob/master/ALGORITHM.md
|
||||
// For a fuzzy match string needle being searched for in haystack we provide a
|
||||
// number score for how well we match.
|
||||
// We create two matrices of size needle_len (n) by haystack_len (m).
|
||||
// The first matrix is the score matrix. Each position (i,j) within this matrix
|
||||
// consists of the score that corresponds to the score that would be generated
|
||||
// by matching the first i characters of the needle with the first j
|
||||
// characters of the haystack. Gaps have a fixed penalty for having a gap along
|
||||
// with a linear penalty for gap size (c.f. gotoh's algorithm).
|
||||
// matches give a positive score, with a slight weight given to matches after
|
||||
// certain special characters (i.e. the first character after a `/` will be
|
||||
// "almost" consecutive but lower than an actual consecutive match).
|
||||
// Our second matrix is our diagonal matrix where we store the best match
|
||||
// that ends at a match. This allows us to calculate our gap penalties alongside
|
||||
// our consecutive match scores.
|
||||
// In addition, since we only rely on the current, and previous row of the
|
||||
// matrices and we only want to compute the score, we only store those scores
|
||||
// and reuse the previous rows (rather than storing the entire (n*m) matrix).
|
||||
// In addition we've simplified some of the algorithm compared to fzy to
|
||||
// improve legibility. (Can reimplement lookup tables later if wanted.)
|
||||
// Also, the reference algorithm does not take into account case sensitivity
|
||||
// which has been implemented here.
|
||||
|
||||
|
||||
score_t utils_fuzzy_score(const char* haystack, const char* needle) {
|
||||
if(!*needle)
|
||||
return SCORE_MIN;
|
||||
|
||||
int n = strlen(needle);
|
||||
int m = strlen(haystack);
|
||||
score_t match_bonus[m];
|
||||
precompute_bonus(haystack, match_bonus);
|
||||
|
||||
if(m > MATCH_FUZZY_MAX_LEN || n > m) {
|
||||
/*
|
||||
* Unreasonably large candidate: return no score
|
||||
* If it is a valid match it will still be returned, it will
|
||||
* just be ranked below any reasonably sized candidates
|
||||
*/
|
||||
return SCORE_MIN;
|
||||
} else if(n == m) {
|
||||
/* Since this method can only be called with a haystack which
|
||||
* matches needle. If the lengths of the strings are equal the
|
||||
* strings themselves must also be equal (ignoring case).
|
||||
*/
|
||||
return SCORE_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* D[][] Stores the best score for this position ending with a match.
|
||||
* M[][] Stores the best possible score at this position.
|
||||
*/
|
||||
score_t D[2][MATCH_FUZZY_MAX_LEN], M[2][MATCH_FUZZY_MAX_LEN];
|
||||
|
||||
score_t* last_D, *last_M;
|
||||
score_t* curr_D, *curr_M;
|
||||
|
||||
last_D = D[0];
|
||||
last_M = M[0];
|
||||
curr_D = D[1];
|
||||
curr_M = M[1];
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
match_row(i, curr_D, curr_M, last_D, last_M, needle, haystack, n, m, match_bonus);
|
||||
|
||||
SWAP(curr_D, last_D, score_t *);
|
||||
SWAP(curr_M, last_M, score_t *);
|
||||
}
|
||||
|
||||
return last_M[m - 1];
|
||||
}
|
||||
|
||||
void utils_mkdir(char* path, mode_t mode) {
|
||||
if(access(path, F_OK) != 0) {
|
||||
char* tmp = strdup(path);
|
||||
|
264
src/wofi.c
264
src/wofi.c
@ -29,6 +29,7 @@
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include <utils.h>
|
||||
#include <match.h>
|
||||
#include <config.h>
|
||||
#include <utils_g.h>
|
||||
#include <property_box.h>
|
||||
@ -41,16 +42,9 @@
|
||||
#include <gdk/gdkwayland.h>
|
||||
|
||||
#define PROTO_VERSION(v1, v2) (v1 < v2 ? v1 : v2)
|
||||
#define MAX_MULTI_CONTAINS_FILTER_SIZE 256
|
||||
|
||||
static const char* terminals[] = {"kitty", "termite", "alacritty", "foot", "gnome-terminal", "weston-terminal"};
|
||||
|
||||
enum matching_mode {
|
||||
MATCHING_MODE_CONTAINS,
|
||||
MATCHING_MODE_MULTI_CONTAINS,
|
||||
MATCHING_MODE_FUZZY
|
||||
};
|
||||
|
||||
enum location {
|
||||
LOCATION_CENTER,
|
||||
LOCATION_TOP_LEFT,
|
||||
@ -972,66 +966,6 @@ static void activate_search(GtkEntry* entry, gpointer data) {
|
||||
}
|
||||
}
|
||||
|
||||
static gboolean do_strcomp(gchar* filter, const gchar* text) {
|
||||
if(filter == NULL || strcmp(filter, "") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
if(text == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
if(insensitive) {
|
||||
return strcasestr(text, filter) != NULL;
|
||||
} else {
|
||||
return strstr(text, filter) != NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static char* strcasechr(const char *s, char c) {
|
||||
const char accept[3] = {c, toupper(c), 0};
|
||||
return strpbrk(s, accept);
|
||||
}
|
||||
|
||||
|
||||
static gboolean do_fuzzy_strcomp(gchar* filter, const gchar* text) {
|
||||
if (filter == NULL || strcmp(filter, "") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
if (text == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
// we just check that all the characters (ignoring case) are in the
|
||||
// search text possibly case insensitively in the correct order
|
||||
while (*filter) {
|
||||
char nch = *filter++;
|
||||
|
||||
if (!(text = strcasechr(text, nch))) {
|
||||
return FALSE;
|
||||
}
|
||||
text++;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gboolean do_multi_strcomp(gchar* filter, const gchar* text) {
|
||||
if(filter == NULL || strcmp(filter, "") == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
if(text == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
gchar new_filter[MAX_MULTI_CONTAINS_FILTER_SIZE];
|
||||
strncpy(new_filter, filter, sizeof(new_filter));
|
||||
new_filter[sizeof(new_filter) - 1] = '\0';
|
||||
gchar* token;
|
||||
gchar* rest = new_filter;
|
||||
while((token = strtok_r(rest, " ", &rest))) {
|
||||
if(do_strcomp(token, text) == FALSE) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static gboolean filter_proxy(GtkFlowBoxChild* row) {
|
||||
GtkWidget* box = gtk_bin_get_child(GTK_BIN(row));
|
||||
if(GTK_IS_EXPANDER(box)) {
|
||||
@ -1039,40 +973,20 @@ static gboolean filter_proxy(GtkFlowBoxChild* row) {
|
||||
}
|
||||
const gchar* text =
|
||||
wofi_property_box_get_property(WOFI_PROPERTY_BOX(box), "filter");
|
||||
return do_strcomp(filter, text);
|
||||
}
|
||||
|
||||
static gboolean filter_multi_proxy(GtkFlowBoxChild* row) {
|
||||
GtkWidget* box = gtk_bin_get_child(GTK_BIN(row));
|
||||
if(GTK_IS_EXPANDER(box)) {
|
||||
box = gtk_expander_get_label_widget(GTK_EXPANDER(box));
|
||||
}
|
||||
const gchar* text =
|
||||
wofi_property_box_get_property(WOFI_PROPERTY_BOX(box), "filter");
|
||||
return do_multi_strcomp(filter, text);
|
||||
}
|
||||
|
||||
static gboolean filter_fuzzy_proxy(GtkFlowBoxChild *row) {
|
||||
GtkWidget *box = gtk_bin_get_child(GTK_BIN(row));
|
||||
if (GTK_IS_EXPANDER(box)) {
|
||||
box = gtk_expander_get_label_widget(GTK_EXPANDER(box));
|
||||
}
|
||||
const gchar *text =
|
||||
wofi_property_box_get_property(WOFI_PROPERTY_BOX(box), "filter");
|
||||
return do_fuzzy_strcomp(filter, text);
|
||||
return match_for_matching_mode(filter, text, matching, insensitive);
|
||||
}
|
||||
|
||||
static void do_resize_surface_after_filter(GtkFlowBoxChild *row, gboolean filter_return) {
|
||||
|
||||
if (gtk_widget_get_visible(GTK_WIDGET(row)) == !filter_return &&
|
||||
if(gtk_widget_get_visible(GTK_WIDGET(row)) == !filter_return &&
|
||||
dynamic_lines) {
|
||||
if (filter_return) {
|
||||
if(filter_return) {
|
||||
++line_count;
|
||||
} else {
|
||||
--line_count;
|
||||
}
|
||||
|
||||
if (line_count < max_lines) {
|
||||
if(line_count < max_lines) {
|
||||
lines = line_count;
|
||||
update_surface_size();
|
||||
} else {
|
||||
@ -1093,133 +1007,6 @@ static gboolean do_filter(GtkFlowBoxChild* row, gpointer data) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gboolean do_fuzzy_filter(GtkFlowBoxChild* row, gpointer data) {
|
||||
(void)data;
|
||||
gboolean ret = filter_fuzzy_proxy(row);
|
||||
|
||||
do_resize_surface_after_filter(row, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gboolean do_multi_filter(GtkFlowBoxChild* row, gpointer data) {
|
||||
(void)data;
|
||||
gboolean ret = filter_multi_proxy(row);
|
||||
|
||||
do_resize_surface_after_filter(row, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static gint fuzzy_sort(const gchar *text1, const gchar *text2) {
|
||||
gboolean match1 = do_fuzzy_strcomp(filter, text1);
|
||||
gboolean match2 = do_fuzzy_strcomp(filter, text2);
|
||||
// both filters match do fuzzy scoring
|
||||
if(match1 && match2) {
|
||||
score_t dist1 = utils_fuzzy_score(text1, filter);
|
||||
score_t dist2 = utils_fuzzy_score(text2, filter);
|
||||
if (dist1 == dist2) {
|
||||
// same same
|
||||
return 0;
|
||||
} else if (dist1 > dist2) { // highest score wins.
|
||||
// text1 goes first
|
||||
return -1;
|
||||
} else {
|
||||
// text2 goes first
|
||||
return 1;
|
||||
}
|
||||
} else if(match1) {
|
||||
// text1 goes first
|
||||
return -1;
|
||||
} else if(match2) {
|
||||
// text2 goes first
|
||||
return 1;
|
||||
} else {
|
||||
// same same.
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// we sort based on how early in the string all the matches are.
|
||||
// if there are matches for each.
|
||||
static gint multi_contains_sort(const gchar* text1, const gchar* text2) {
|
||||
// sum of string positions of each match
|
||||
int t1_count = 0;
|
||||
int t2_count = 0;
|
||||
// does this string match with mult-contains
|
||||
bool t1_match = true;
|
||||
bool t2_match = true;
|
||||
|
||||
gchar new_filter[MAX_MULTI_CONTAINS_FILTER_SIZE];
|
||||
strncpy(new_filter, filter, sizeof(new_filter));
|
||||
new_filter[sizeof(new_filter) - 1] = '\0';
|
||||
|
||||
gchar* token;
|
||||
gchar* rest = new_filter;
|
||||
while((token = strtok_r(rest, " ", &rest))) {
|
||||
char* str1, *str2;
|
||||
if(insensitive) {
|
||||
str1 = strcasestr(text1, token);
|
||||
str2 = strcasestr(text2, token);
|
||||
} else {
|
||||
str1 = strstr(text1, token);
|
||||
str2 = strstr(text2, token);
|
||||
}
|
||||
t1_match = t1_match && str1 != NULL;
|
||||
t2_match = t2_match && str2 != NULL;
|
||||
if(str1 != NULL) {
|
||||
int pos1 = str1 - text1;
|
||||
t1_count += pos1;
|
||||
}
|
||||
if(str2 != NULL) {
|
||||
int pos2 = str2 - text2;
|
||||
t2_count += pos2;
|
||||
}
|
||||
}
|
||||
if(t1_match && t2_match) {
|
||||
// both match
|
||||
// return the one with the smallest count.
|
||||
return t1_count - t2_count;
|
||||
} else if(t1_match) {
|
||||
return -1;
|
||||
} else if(t2_match) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
static gint contains_sort(const gchar* text1, const gchar* text2) {
|
||||
char* str1, *str2;
|
||||
|
||||
if(insensitive) {
|
||||
str1 = strcasestr(text1, filter);
|
||||
str2 = strcasestr(text2, filter);
|
||||
} else {
|
||||
str1 = strstr(text1, filter);
|
||||
str2 = strstr(text2, filter);
|
||||
}
|
||||
bool tx1 = str1 == text1;
|
||||
bool tx2 = str2 == text2;
|
||||
bool txc1 = str1 != NULL;
|
||||
bool txc2 = str2 != NULL;
|
||||
|
||||
if(tx1 && tx2) {
|
||||
return 0;
|
||||
} else if(tx1) {
|
||||
return -1;
|
||||
} else if(tx2) {
|
||||
return 1;
|
||||
} else if(txc1 && txc2) {
|
||||
return 0;
|
||||
} else if(txc1) {
|
||||
return -1;
|
||||
} else if(txc2) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static gint do_sort(GtkFlowBoxChild* child1, GtkFlowBoxChild* child2, gpointer data) {
|
||||
(void) data;
|
||||
gtk_flow_box_get_child_at_index(GTK_FLOW_BOX(inner_box), 0);
|
||||
@ -1242,7 +1029,7 @@ static gint do_sort(GtkFlowBoxChild* child1, GtkFlowBoxChild* child2, gpointer d
|
||||
return index1 - index2;
|
||||
}
|
||||
|
||||
uint64_t fallback = 0;
|
||||
int fallback = 0;
|
||||
switch(sort_order) {
|
||||
case SORT_ORDER_DEFAULT:
|
||||
fallback = index1 - index2;
|
||||
@ -1259,30 +1046,7 @@ static gint do_sort(GtkFlowBoxChild* child1, GtkFlowBoxChild* child2, gpointer d
|
||||
if(filter == NULL || strcmp(filter, "") == 0) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
gint primary = 0;
|
||||
switch(matching) {
|
||||
case MATCHING_MODE_MULTI_CONTAINS:
|
||||
primary = multi_contains_sort(text1, text2);
|
||||
if(primary == 0) {
|
||||
return fallback;
|
||||
}
|
||||
return primary;
|
||||
case MATCHING_MODE_CONTAINS:
|
||||
primary = contains_sort(text1, text2);
|
||||
if(primary == 0) {
|
||||
return fallback;
|
||||
}
|
||||
return primary;
|
||||
case MATCHING_MODE_FUZZY:
|
||||
primary = fuzzy_sort(text1, text2);
|
||||
if(primary == 0) {
|
||||
return fallback;
|
||||
}
|
||||
return primary;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
return sort_for_matching_mode(text1, text2, fallback, matching, filter, insensitive);
|
||||
}
|
||||
|
||||
static void select_first(void) {
|
||||
@ -2035,22 +1799,8 @@ void wofi_init(struct map* _config) {
|
||||
gtk_container_add(GTK_CONTAINER(wrapper_box), inner_box);
|
||||
gtk_container_add(GTK_CONTAINER(scroll), wrapper_box);
|
||||
|
||||
switch(matching) {
|
||||
case MATCHING_MODE_MULTI_CONTAINS:
|
||||
gtk_flow_box_set_filter_func(GTK_FLOW_BOX(inner_box), do_multi_filter, NULL, NULL);
|
||||
gtk_flow_box_set_sort_func(GTK_FLOW_BOX(inner_box), do_sort, NULL, NULL);
|
||||
break;
|
||||
case MATCHING_MODE_CONTAINS:
|
||||
gtk_flow_box_set_filter_func(GTK_FLOW_BOX(inner_box), do_filter, NULL, NULL);
|
||||
gtk_flow_box_set_sort_func(GTK_FLOW_BOX(inner_box), do_sort, NULL, NULL);
|
||||
break;
|
||||
case MATCHING_MODE_FUZZY:
|
||||
gtk_flow_box_set_filter_func(GTK_FLOW_BOX(inner_box), do_fuzzy_filter, NULL,
|
||||
NULL);
|
||||
gtk_flow_box_set_sort_func(GTK_FLOW_BOX(inner_box), do_sort, NULL,
|
||||
NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
g_signal_connect(inner_box, "child-activated", G_CALLBACK(activate_item), NULL);
|
||||
g_signal_connect(inner_box, "selected-children-changed", G_CALLBACK(select_item), NULL);
|
||||
|
Loading…
Reference in New Issue
Block a user