diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el index fc193fe54f0..5b5408a595c 100644 --- a/lisp/minibuffer.el +++ b/lisp/minibuffer.el @@ -4421,9 +4421,12 @@ This is used in combination with `completion-pcm--segments->regex'." (setq idx i))) idx)) -(defun completion-pcm--all-completions (prefix pattern table pred) +(defun completion-pcm--all-completions (prefix pattern table pred + &optional override-re) "Find all completions for PATTERN in TABLE obeying PRED. -PATTERN is as returned by `completion-pcm--string->pattern'." +PATTERN is as returned by `completion-pcm--string->pattern'. +OVERRIDE-RE means to use this regular expression instead of grabbing one +from PATTERN." ;; (cl-assert (= (car (completion-boundaries prefix table pred "")) ;; (length prefix))) ;; Find an initial list of possible completions. @@ -4435,7 +4438,7 @@ PATTERN is as returned by `completion-pcm--string->pattern'." ;; Use all-completions to do an initial cull. This is a big win, ;; since all-completions is written in C! (let* (;; Convert search pattern to a standard regular expression. - (regex (completion-pcm--pattern->regex pattern)) + (regex (or override-re (completion-pcm--pattern->regex pattern))) (case-fold-search completion-ignore-case) (completion-regexp-list (cons regex completion-regexp-list)) (compl (all-completions @@ -4450,18 +4453,12 @@ PATTERN is as returned by `completion-pcm--string->pattern'." (when (string-match-p regex c) (push c poss))) (nreverse poss)))))) -(defvar flex-score-match-tightness 3 - "Controls how the `flex' completion style scores its matches. +(defvar flex-score-match-tightness nil) -Value is a positive number. A number smaller than 1 makes the -scoring formula reward matches scattered along the string, while -a number greater than one make the formula reward matches that -are clumped together. I.e \"foo\" matches both strings -\"fbarbazoo\" and \"fabrobazo\", which are of equal length, but -only a value greater than one will score the former (which has -one large \"hole\" and a clumped-together \"oo\" match) higher -than the latter (which has two \"holes\" and three -one-letter-long matches).") +(make-obsolete-variable + 'flex-score-match-tightness + "It never did anything very useful anyway." + "31.0") (defvar completion-lazy-hilit nil "If non-nil, request lazy highlighting of completion candidates. @@ -4502,133 +4499,49 @@ details." (funcall completion-lazy-hilit-fn (copy-sequence str)) str)) -(defun completion--hilit-from-re (string regexp &optional point-idx) - "Fontify STRING using REGEXP POINT-IDX. -Uses `completions-common-part' and `completions-first-difference' -faces to fontify STRING. -POINT-IDX is the position of point in the presumed \"PCM\" pattern -from which REGEXP was generated." - (let* ((md (and regexp (string-match regexp string) (cddr (match-data t)))) - (pos (if point-idx (match-beginning point-idx) (match-end 0))) - (me (and md (match-end 0))) - (from 0)) - (while md - (add-face-text-property from (pop md) - 'completions-common-part nil string) - (setq from (pop md))) - (if (and (numberp pos) (> (length string) pos)) - (add-face-text-property - pos (1+ pos) - 'completions-first-difference - nil string)) - (unless (or (not me) (= from me)) - (add-face-text-property from me 'completions-common-part nil string)) - string)) +(cl-defun completion--flex-score (pat str &optional dont-error) + "Compute flex score of STR matching PAT using Gotoh algorithm. +If DONT-ERROR, return nil if PAT cannot match STR. +Return (NORMALIZED-COST . MATCHES) where NORMALIZED-COST is a +number (lower = better) and MATCHES is a list of match positions in STR." + (pcase-let ((`(,cost . ,matches) + (completion--flex-score-gotoh pat str))) + (unless cost + (if dont-error (cl-return-from completion--flex-score nil) + (error "Pattern %s does not match %s" pat str))) + (cons (* (1+ cost) (- (length str) (length pat))) matches))) -(defun completion--flex-score-1 (md-groups match-end len) - "Compute matching score of completion. -The score lies in the range between 0 and 1, where 1 corresponds to -the full match. -MD-GROUPS is the \"group\" part of the match data. -MATCH-END is the end of the match. -LEN is the length of the completion string." - (let* ((from 0) - ;; To understand how this works, consider these simple - ;; ascii diagrams showing how the pattern "foo" - ;; flex-matches "fabrobazo", "fbarbazoo" and - ;; "barfoobaz": +(defun completion--flex-propertize (str matches point-idx segments) + "Add completion faces to STR based on MATCHES and POINT-IDX. +MATCHES is a list of match positions. POINT-IDX is a match group index +from the PCM pattern. SEGMENTS are extracted from the full PCM pattern. +Adds `completions-common-part' for matched positions and +`completions-first-difference' for the position corresponding to point." + (when point-idx + ;; Compute character position from segments + (let* ((pos (cl-loop for seg in segments + for i from 1 + while (<= i point-idx) + sum (length (car seg))))) + ;; Add first-difference after pos-th match, if in range + (let ((point-match (and (> pos 0) + (<= pos (length matches)) + (nth (1- pos) matches)))) + (when (and point-match (< (1+ point-match) (length str))) + (add-face-text-property + (1+ point-match) (+ 2 point-match) + 'completions-first-difference nil str))))) + ;; Highlight matched positions + (dolist (pos matches) + (add-face-text-property pos (1+ pos) + 'completions-common-part + nil str)) + str) - ;; f abr o baz o - ;; + --- + --- + - - ;; f barbaz oo - ;; + ------ ++ - - ;; bar foo baz - ;; +++ - - ;; "+" indicates parts where the pattern matched. A - ;; "hole" in the middle of the string is indicated by - ;; "-". Note that there are no "holes" near the edges - ;; of the string. The completion score is a number - ;; bound by (0..1] (i.e., larger than (but not equal - ;; to) zero, and smaller or equal to one): the higher - ;; the better and only a perfect match (pattern equals - ;; string) will have score 1. The formula takes the - ;; form of a quotient. For the numerator, we use the - ;; number of +, i.e. the length of the pattern. For - ;; the denominator, it first computes - ;; - ;; hole_i_contrib = 1 + (Li-1)^(1/tightness) - ;; - ;; , for each hole "i" of length "Li", where tightness - ;; is given by `flex-score-match-tightness'. The - ;; final value for the denominator is then given by: - ;; - ;; (SUM_across_i(hole_i_contrib) + 1) * len - ;; - ;; , where "len" is the string's length. - (score-numerator 0) - (score-denominator 0) - (last-b 0)) - (while (and md-groups (car md-groups)) - (let ((a from) - (b (pop md-groups))) - (setq - score-numerator (+ score-numerator (- b a))) - (unless (or (= a last-b) - (zerop last-b) - (= a len)) - (setq - score-denominator (+ score-denominator - 1 - (expt (- a last-b 1) - (/ 1.0 - flex-score-match-tightness))))) - (setq - last-b b)) - (setq from (pop md-groups))) - ;; If `pattern' doesn't have an explicit trailing any, the - ;; regex `re' won't produce match data representing the - ;; region after the match. We need to account to account - ;; for that extra bit of match (bug#42149). - (unless (= from match-end) - (let ((a from) - (b match-end)) - (setq - score-numerator (+ score-numerator (- b a))) - (unless (or (= a last-b) - (zerop last-b) - (= a len)) - (setq - score-denominator (+ score-denominator - 1 - (expt (- a last-b 1) - (/ 1.0 - flex-score-match-tightness))))) - (setq - last-b b))) - (/ score-numerator (* len (1+ score-denominator)) 1.0))) - -(defvar completion--flex-score-last-md nil - "Helper variable for `completion--flex-score'.") - -(defun completion--flex-score (str re &optional dont-error) - "Compute flex score of completion STR based on RE. -If DONT-ERROR, just return nil if RE doesn't match STR." - (let ((case-fold-search completion-ignore-case)) - (cond ((string-match re str) - (let* ((match-end (match-end 0)) - (md (cddr - (setq - completion--flex-score-last-md - (match-data t completion--flex-score-last-md))))) - (completion--flex-score-1 md match-end (length str)))) - ((not dont-error) - (error "Internal error: %s does not match %s" re str))))) - -(defvar completion-pcm--regexp nil - "Regexp from PCM pattern in `completion-pcm--hilit-commonality'.") +(defvar completion-flex--pattern-str nil + "Pattern string for flex completion scoring. +This is the concatenated string parts from the PCM pattern, +used by `completion--flex-score' for Gotoh algorithm matching.") (defun completion-pcm--hilit-commonality (pattern completions) "Show where and how well PATTERN matches COMPLETIONS. @@ -4645,22 +4558,37 @@ relevant segments. Else, if `completion-lazy-hilit' is t, return COMPLETIONS unchanged, but setup a suitable `completion-lazy-hilit-fn' (which see) for later lazy highlighting." - (setq completion-pcm--regexp nil - completion-lazy-hilit-fn nil) + (setq completion-lazy-hilit-fn nil + completion-flex--pattern-str nil) (cond ((and completions (cl-loop for e in pattern thereis (stringp e))) (let* ((segments (completion-pcm--pattern->segments pattern)) - (re (completion-pcm--segments->regex segments 'group)) - (point-idx (completion-pcm--segments-point-idx segments))) - (setq completion-pcm--regexp re) + (point-idx (completion-pcm--segments-point-idx segments)) + ;; Extract pattern string (concatenate string elements) + (pat (mapconcat #'identity + (delq nil (mapcar (lambda (x) + (if (stringp x) x nil)) + pattern)) + ""))) + (setq completion-flex--pattern-str pat) (cond (completion-lazy-hilit (setq completion-lazy-hilit-fn - (lambda (str) (completion--hilit-from-re str re point-idx))) + (lambda (str) + (let ((result (completion--flex-score pat str t))) + (when result + (completion--flex-propertize + str (cdr result) point-idx segments))) + str)) completions) (t (mapcar (lambda (str) - (completion--hilit-from-re (copy-sequence str) re point-idx)) + (setq str (copy-sequence str)) + (let ((result (completion--flex-score pat str t))) + (when result + (completion--flex-propertize + str (cdr result) point-idx segments))) + str) completions))))) (t completions))) @@ -4959,11 +4887,13 @@ the same set of elements." ;; Mostly derived from the code of `basic' completion. (defun completion-substring--all-completions - (string table pred point &optional transform-pattern-fn) + (string table pred point &optional + transform-pattern-fn simple-re) "Match the presumed substring STRING to the entries in TABLE. -Respect PRED and POINT. The pattern used is a PCM-style -substring pattern, but it be massaged by TRANSFORM-PATTERN-FN, if -that is non-nil." +Respect PRED and POINT. The pattern used is a PCM-style substring +pattern, but it be massaged by TRANSFORM-PATTERN-FN, if that is non-nil. +SIMPLE-RE is means to pass a simpler faster regular expression to +`completion-pcm--all-completions'" (let* ((beforepoint (substring string 0 point)) (afterpoint (substring string point)) (bounds (completion-boundaries beforepoint table pred afterpoint)) @@ -4978,7 +4908,13 @@ that is non-nil." (if transform-pattern-fn (funcall transform-pattern-fn pattern) pattern))) - (all (completion-pcm--all-completions prefix pattern table pred))) + (override-re (and simple-re + (mapconcat #'identity + (split-string + (substring string (car bounds) + (+ point (cdr bounds))) "" t) + ".*"))) + (all (completion-pcm--all-completions prefix pattern table pred override-re))) (list all pattern prefix suffix (car bounds)))) (defun completion-substring-try-completion (string table pred point) @@ -5009,7 +4945,7 @@ that is non-nil." (defun completion--flex-adjust-metadata (metadata) "If `flex' is actually doing filtering, adjust sorting." - (let ((flex-is-filtering-p completion-pcm--regexp) + (let ((flex-is-filtering-p completion-flex--pattern-str) (existing-dsf (completion-metadata-get metadata 'display-sort-function)) (existing-csf @@ -5021,11 +4957,11 @@ that is non-nil." (mapcar (lambda (str) (cons - (- (completion--flex-score - (or (get-text-property - 0 'completion--unquoted str) - str) - completion-pcm--regexp)) + (car (completion--flex-score + completion-flex--pattern-str + (or (get-text-property + 0 'completion--unquoted str) + str))) str)) (if existing-sort-fn (funcall existing-sort-fn completions) @@ -5067,7 +5003,8 @@ which is at the core of flex logic. The extra (pcase-let ((`(,all ,pattern ,prefix ,suffix ,_carbounds) (completion-substring--all-completions string table pred point - #'completion-flex--make-flex-pattern))) + #'completion-flex--make-flex-pattern + t))) (if minibuffer-completing-file-name (setq all (completion-pcm--filename-try-filter all))) ;; Try some "merging", meaning add as much as possible to the @@ -5084,7 +5021,8 @@ which is at the core of flex logic. The extra (pcase-let ((`(,all ,pattern ,prefix ,_suffix ,_carbounds) (completion-substring--all-completions string table pred point - #'completion-flex--make-flex-pattern))) + #'completion-flex--make-flex-pattern + t))) (when all (nconc (completion-pcm--hilit-commonality pattern all) (length prefix)))))) diff --git a/src/minibuf.c b/src/minibuf.c index 5dc2b230883..f7dffc24b94 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -20,6 +20,7 @@ along with GNU Emacs. If not, see . */ #include #include +#include #include @@ -2279,6 +2280,201 @@ init_minibuf_once_for_pdumper (void) last_minibuf_string = Qnil; } +/* FLEX/GOTOH algorithm for the 'flex' completion-style. Adapted from + GOTOH, Osamu. An improved algorithm for matching biological + sequences. Journal of molecular biology, 1982, 162.3: 705-708. + + This algorithm matches patterns to candidate strings, or needles to + haystacks. It works with cost matrices: imagine rows of these + matrices as pattern characters, and columns as the candidate string + characters. There is a -1 row, and a -1 column. The values there + hold real costs used for situations "before the first ever" match of + a pattern character to a string character. + + M and D are cost matrices. At the end of the algorithm, M will have + non-infinite values only for the spots where a pattern character + matches a string character. So a non-infinite M[i,j] means the i-th + character of the pattern matches the j-th character of the string. + The value stored is the lowest possible cost the algorithm had to + "pay" to be able to make that match there, given everything that may + have happened before/to the left. An infinite value simply means no + match at this pattern/string position. Note that both row and column + of M may have more than one match at multiple indices. But this + particular implementation of the algorithm assumes they have at least + one match. + + D (originally stands for 'Deletion' in the Gotoh paper) has "running + costs". Each value D[i,j] represents what the algorithm has to pay + to make or extend a gap when a match is found at i+1, j+1. By that + time, that cost may or may not be lower than continuing from a match + that had also been found at i,j. We always pick the lowest cost, and + by the time we reach the final column, we know we have picked the + cheapest possible path choosing when to gap, and when to follow up. + + Along the way, we construct P, a matrix used just for backtracking, + to reconstruct that path. Maybe P isn't needed, and all the + information can be cleverly derived from the final state of M and D. + But I couldn't make it work. */ +DEFUN ("completion--flex-score-gotoh", Fcompletion__flex_score_gotoh, + Scompletion__flex_score_gotoh, 2, 2, 0, + doc: /* Compute flex score of STR matching PAT using Gotoh +algorithm. Return nil if no match found, else return (COST . MATCHES) +where COST is a fixnum (lower is better) and MATCHES is a list of match +positions in STR. */) + (Lisp_Object pat, Lisp_Object str) +{ + /* Pre-allocated matrices for flex completion scoring. */ +#define FLEX_MAX_STR_SIZE 512 +#define FLEX_MAX_PAT_SIZE 128 +#define FLEX_MAX_MATRIX_SIZE FLEX_MAX_PAT_SIZE * FLEX_MAX_STR_SIZE + /* Macro for 2D indexing into "flat" arrays. */ +#define MAT(matrix, i, j) ((matrix)[((i) + 1) * width + ((j) + 1)]) + + CHECK_STRING (pat); + CHECK_STRING (str); + + size_t patlen = SCHARS (pat); + size_t strlen = SCHARS (str); + size_t width = strlen + 1; + size_t size = (patlen + 1) * width; + + /* Bail if strings are empty or matrix too large. */ + if (patlen == 0 || strlen == 0) + return Qnil; + + if (size > FLEX_MAX_MATRIX_SIZE) + return Qnil; + + /* Cost constants (lower is better). Maybe these could be + defcustom's?*/ + const int gap_open_cost = 10; + const int gap_extend_cost = 1; + const int pos_inf = INT_MAX / 2; + + static int M[FLEX_MAX_MATRIX_SIZE]; + static int D[FLEX_MAX_MATRIX_SIZE]; + static size_t P[FLEX_MAX_MATRIX_SIZE]; + + /* Initialize costs. Fill both matrices with positive infinity. */ + for (int j = 0; j < size; j++) M[j] = pos_inf; + for (int j = 0; j < size; j++) D[j] = pos_inf; + /* Except for D[0,0], which is 0, for prioritizing matches at the + beginning. Remaining elements on the first row are gap_open_cost/2 + to represent cheaper leading gaps. */ + for (int j = 0; j < width; j++) D[j] = gap_open_cost/2; + D[0] = 0; + + /* Index of last match before gap started, as computed in the previous + row. Used to build P. */ + int prev_gap_origin = -1; + + /* Poor man's iterator type. */ + typedef struct iter { int x; ptrdiff_t c; ptrdiff_t b; } iter_t; + + /* Info about first match computed in the previous row. */ + iter_t prev_match = {0,0,0}; + /* Forward pass. */ + for (iter_t i = {0,0,0}; i.x < patlen; i.x++) + { + int pat_char = fetch_string_char_advance(pat, &i.c, &i.b); + int gap_origin = -1; + bool match_seen = false; + + for (iter_t j = prev_match; j.x < strlen; j.x++) + { + iter_t jcopy = j; /* else advance function destroys it... */ + int str_char + = fetch_string_char_advance (str, &j.c, &j.b); + + /* Check if characters match (case-insensitive if needed). */ + bool cmatch; + if (completion_ignore_case) + cmatch = (downcase (pat_char) == downcase (str_char)); + else + cmatch = (pat_char == str_char); + + /* Compute match cost M[i][j], i.e. replace its infinite + value with something finite. */ + if (cmatch) + { + if (!match_seen) + { + match_seen = true; + prev_match = jcopy; + } + int pmatch_cost = MAT (M, i.x - 1, j.x - 1); + int pgap_cost = MAT (D, i.x - 1, j.x - 1); + + if (pmatch_cost <= pgap_cost) + { + /* Not only did the previous char also match (else + pmatch_cost would have been infinite) but following + it up with this match is best overall. */ + MAT (M, i.x, j.x) = pmatch_cost; + MAT (P, i.x, j.x) = j.x - 1; + } + else + { + /* Gapping is best, regardless of whether the previous + char also matched. That is, it's better to arrive at + this match from a gap. */ + MAT (M, i.x, j.x) = pgap_cost; + MAT (P, i.x, j.x) = prev_gap_origin; + } + } + + /* Regardless of a match here, compute D[i,j], the best + accumulated gapping cost at this point, considering whether + it's more advantageous to open from a previous match on + this row (a cost which may well be infinite if no such + match ever existed) or extend a gap started sometime + before. The next iteration will take this into account, + and so will the next row when analyzing a possible match + for the j+1-th string character. */ + int open_cost = MAT (M, i.x, j.x - 1) + gap_open_cost; + int extend_cost = MAT (D, i.x, j.x - 1) + gap_extend_cost; + + if (open_cost < extend_cost) + { + MAT (D, i.x, j.x) = open_cost; + gap_origin = j.x - 1; /* New gap. */ + } + else + MAT (D, i.x, j.x) = extend_cost; /* Extend gap. */ + } + prev_gap_origin = gap_origin; + } + + /* Find best (lowest) cost in last row. */ + int best_cost = pos_inf; + int lastcol = -1; + + for (int j = 0; j < strlen; j++) + { + int cost = MAT (M, patlen - 1, j); + if (cost < best_cost) + { + best_cost = cost; + lastcol = j; + } + } + + if (lastcol < 0 || best_cost >= pos_inf) + return Qnil; + + /* Build match positions list by tracing back through P matrix. */ + Lisp_Object matches = Qnil; + for (int i = patlen - 1, l = lastcol; i >= 0 && l >= 0; i--) + { + matches = Fcons (make_fixnum (l), matches); + l = MAT (P, i, l); + } + + return Fcons (make_fixnum (best_cost), matches); +#undef MAT + +} + void syms_of_minibuf (void) { @@ -2541,6 +2737,7 @@ showing the *Completions* buffer, if any. */); defsubr (&Stest_completion); defsubr (&Sassoc_string); defsubr (&Scompleting_read); + defsubr (&Scompletion__flex_score_gotoh); DEFSYM (Qminibuffer_quit_recursive_edit, "minibuffer-quit-recursive-edit"); DEFSYM (Qinternal_complete_buffer, "internal-complete-buffer"); DEFSYM (Qcompleting_read_function, "completing-read-function"); diff --git a/test/lisp/minibuffer-tests.el b/test/lisp/minibuffer-tests.el index 79ffb1d3fc7..02df7661c75 100644 --- a/test/lisp/minibuffer-tests.el +++ b/test/lisp/minibuffer-tests.el @@ -205,11 +205,6 @@ '("some/alpha" "base/epsilon" "base/delta")) `("epsilon" "delta" "beta" "alpha" "gamma" . 5)))) -(defun completion--pcm-score (comp) - "Get `completion-score' from COMP." - ;; FIXME, uses minibuffer.el implementation details - (completion--flex-score comp completion-pcm--regexp)) - (defun completion--pcm-first-difference-pos (comp) "Get `completions-first-difference' from COMP." (cl-loop for pos = (next-single-property-change 0 'face comp) @@ -244,20 +239,11 @@ "barfoobar"))) (ert-deftest completion-pcm-test-3 () - ;; Full match! - (should (eql - (completion--pcm-score - (car (completion-pcm-all-completions - "R" '("R" "hello") nil 1))) - 1.0))) + (should (car (completion-pcm-all-completions + "R" '("R" "hello") nil 1)))) (ert-deftest completion-pcm-test-4 () - ;; One fourth of a match and no match due to point being at the end - (should (eql - (completion--pcm-score - (car (completion-pcm-all-completions - "RO" '("RaOb") nil 1))) - (/ 1.0 4.0))) + ;; No match due to point being at the end (should (null (completion-pcm-all-completions "RO" '("RaOb") nil 2)))) @@ -420,24 +406,14 @@ "a"))) (ert-deftest completion-substring-test-1 () - ;; One third of a match! (should (equal (car (completion-substring-all-completions "foo" '("hello" "world" "barfoobar") nil 3)) - "barfoobar")) - (should (eql - (completion--pcm-score - (car (completion-substring-all-completions - "foo" '("hello" "world" "barfoobar") nil 3))) - (/ 1.0 3.0)))) + "barfoobar"))) (ert-deftest completion-substring-test-2 () - ;; Full match! - (should (eql - (completion--pcm-score - (car (completion-substring-all-completions - "R" '("R" "hello") nil 1))) - 1.0))) + (should (car (completion-substring-all-completions + "R" '("R" "hello") nil 1)))) (ert-deftest completion-substring-test-3 () ;; Substring match @@ -495,39 +471,70 @@ (completion-substring-try-completion "b" '("ab" "ab") nil 0) '("ab" . 2)))) +(defun completion--sorted-flex-completions (pat list &optional point) + "Flex test helper" + (let ((all (completion-flex-all-completions pat list nil point))) + (setcdr (last all) nil) + (sort all + (lambda (a b) + (< (car (completion--flex-score pat a)) + (car (completion--flex-score pat b))))))) + (ert-deftest completion-flex-test-1 () - ;; Fuzzy match (should (equal (car (completion-flex-all-completions "foo" '("hello" "world" "fabrobazo") nil 3)) "fabrobazo"))) (ert-deftest completion-flex-test-2 () - ;; Full match! - (should (eql - (completion--pcm-score - (car (completion-flex-all-completions - "R" '("R" "hello") nil 1))) - 1.0))) + (should (car (completion--sorted-flex-completions + "R" '("R" "hello") 1)))) (ert-deftest completion-flex-test-3 () - ;; Another fuzzy match, but more of a "substring" one (should (equal - (car (completion-flex-all-completions - "custgroup" '("customize-group-other-window") nil 4)) + (car (completion--sorted-flex-completions + "custgroup" '("customize-group-other-window") 4)) "customize-group-other-window")) ;; `completions-first-difference' should be at the right place (should (equal (completion--pcm-first-difference-pos - (car (completion-flex-all-completions - "custgroup" '("customize-group-other-window") nil 4))) + (car (completion--sorted-flex-completions + "custgroup" '("customize-group-other-window") 4))) 4)) (should (equal (completion--pcm-first-difference-pos - (car (completion-flex-all-completions - "custgroup" '("customize-group-other-window") nil 9))) + (car (completion--sorted-flex-completions + "custgroup" '("customize-group-other-window") 9))) 15))) +(ert-deftest completion-flex-test-non-ascii () + "Test flex completion with variable-width UTF-8 characters." + ;; Uses Japanese Kanji to test multi-byte character handling. + + ;; 日本 = "nihon" (Japan), 東京 = "tōkyō" (Tokyo) + (should (equal + (car (completion--sorted-flex-completions + "日本" '("日本語" "日本" "中国") 2)) + "日本")) + + ;; 図書館 = "toshokan" (library) + (should (equal + (car (completion--sorted-flex-completions + "tsk" '("図書館-toshokan" "task" "desk") 3)) + "task")) + + ;; Mixed pattern (Kanji + ASCII) matching mixed string + ;; 学校 = "gakkō" (school) + (should (equal + (car (completion--sorted-flex-completions + "学s" '("学校-school" "school" "学生") 2)) + "学校-school")) + + ;; Pattern "東" should match "東京" better than "関東" + (let ((results (completion--sorted-flex-completions + "東" '("東京" "関東") 1))) + (should (equal (car results) "東京")))) + (defmacro with-minibuffer-setup (completing-read &rest body) (declare (indent 1) (debug t))