From 97d2a659e8be716e5bb95c9803d9e867396b5699 Mon Sep 17 00:00:00 2001 From: Spencer Baugh Date: Fri, 24 Oct 2025 15:58:44 -0400 Subject: [PATCH] Optimize PCM regex to not contain adjacent wildcards When multiple wildcards occur in a PCM pattern, completion-pcm--pattern->regex previously would generate one instance of [^z-a]* for each of those wildcards, even if the wildcards were adjacent and could therefore be matched by a single [^z-a]*. This can make regex matching performance much worse. For example, with a minibuffer containing "*/" with point at the start, completion-pcm-all-completions would take several seconds to match in project-find-file in the Emacs repo. Now, we run completion-pcm--pattern->segments on the pattern first, which adds additional structure to the pattern, including consolidating adjacent regexes into a single sublist. Then completion-pcm--segments->regex generates a single regex wildcard for each of those pattern wildcards. As a consequence, we need to update the callers of completion-pcm--pattern->regex which pass non-nil GROUP. This provides a substantial performance improvement in various edge cases. * lisp/minibuffer.el (completion-pcm--pattern->segments) (completion-pcm--segments->regex): Add. (bug#79693) (completion-pcm--pattern->regex) (completion-pcm--merge-completions): Call pattern->segments and segments->regex. (completion-pcm--pattern-point-idx): Delete. (completion-pcm--segments-point-idx): Add. (completion-pcm--hilit-commonality): Call completion-pcm--segments-point-idx to find the submatch containing point. * test/lisp/minibuffer-tests.el (completion-pcm-test-5): Add more tests of highlighting the first difference. (completion-pcm-test-pattern->regex): Add tests showing the regex form of PCM patterns. --- lisp/minibuffer.el | 112 +++++++++++++++++++--------------- test/lisp/minibuffer-tests.el | 38 +++++++++++- 2 files changed, 100 insertions(+), 50 deletions(-) diff --git a/lisp/minibuffer.el b/lisp/minibuffer.el index bd20d757340..912c14a3667 100644 --- a/lisp/minibuffer.el +++ b/lisp/minibuffer.el @@ -4291,36 +4291,59 @@ or a symbol, see `completion-pcm--merge-completions'." (_ (push (pop p) n)))) (nreverse n))) -(defun completion-pcm--pattern->regex (pattern &optional group) - (let ((re - (concat "\\`" - (mapconcat - (lambda (x) - (cond - ((stringp x) (regexp-quote x)) - (t - (let ((re (if (eq x 'any-delim) - (concat completion-pcm--delim-wild-regex "*?") - "[^z-a]*?"))) - (if (if (consp group) (memq x group) group) - (concat "\\(" re "\\)") - re))))) - pattern - "")))) - ;; Avoid pathological backtracking. - (while (string-match "\\.\\*\\?\\(?:\\\\[()]\\)*\\(\\.\\*\\?\\)" re) - (setq re (replace-match "" t t re 1))) - re)) +(defun completion-pcm--pattern->segments (pattern) + "Segment PATTERN into more structured sublists. -(defun completion-pcm--pattern-point-idx (pattern) - "Return index of subgroup corresponding to `point' element of PATTERN. -Return nil if there's no such element." +Returns a list of lists which when concatenated is semantically the same +as PATTERN. + +The first element in each sublist is a (possibly empty) string. The +remaining elements in the sublist are all wildcard symbols. If PATTERN +ends with a wildcard, then each sublist is guaranteed to have at least +one wildcard." + (let (ret) + (while pattern + (let ((fixed "") + wildcards) + ;; Pop strings from PATTERN and concatenate them. + (while (stringp (car-safe pattern)) + (setq fixed (concat fixed (pop pattern)))) + ;; Pop wildcards from PATTERN. + (while (and pattern (symbolp (car-safe pattern))) + (push (pop pattern) wildcards)) + ;; The sublist is a fixed string followed by all the wildcards. + (push (cons fixed (nreverse wildcards)) ret))) + (nreverse ret))) + +(defun completion-pcm--segments->regex (segments &optional group) + (concat "\\`" + (mapconcat + (lambda (segment) + (concat + (regexp-quote (car segment)) + (when (cdr segment) + (concat + (when group "\\(") + (if (cl-every (lambda (x) (eq x 'any-delim)) (cdr segment)) + (concat completion-pcm--delim-wild-regex "*?") + "[^z-a]*?") + (when group "\\)"))))) + segments + ""))) + +(defun completion-pcm--pattern->regex (pattern &optional group) + (completion-pcm--segments->regex (completion-pcm--pattern->segments pattern) group)) + +(defun completion-pcm--segments-point-idx (segments) + "Return index of subgroup corresponding to `point' element of SEGMENTS. +Return nil if there's no such element. +This is used in combination with `completion-pcm--segments->regex'." (let ((idx nil) (i 0)) - (dolist (x pattern) - (unless (stringp x) - (incf i) - (if (eq x 'point) (setq idx i)))) + (dolist (x segments) + (incf i) + (when (memq 'point (cdr x)) + (setq idx i))) idx)) (defun completion-pcm--all-completions (prefix pattern table pred) @@ -4551,8 +4574,9 @@ see) for later lazy highlighting." completion-lazy-hilit-fn nil) (cond ((and completions (cl-loop for e in pattern thereis (stringp e))) - (let* ((re (completion-pcm--pattern->regex pattern 'group)) - (point-idx (completion-pcm--pattern-point-idx pattern))) + (let* ((segments (completion-pcm--pattern->segments pattern)) + (re (completion-pcm--segments->regex segments 'group)) + (point-idx (completion-pcm--segments-point-idx segments))) (setq completion-pcm--regexp re) (cond (completion-lazy-hilit (setq completion-lazy-hilit-fn @@ -4696,12 +4720,13 @@ the same set of elements." (cond ((null (cdr strs)) (list (car strs))) (t - (let ((re (completion-pcm--pattern->regex pattern 'group)) + (let ((segmented (completion-pcm--pattern->segments (append pattern '(any)))) (ccs ())) ;Chopped completions. ;; First chop each string into the parts corresponding to each ;; non-constant element of `pattern', using regexp-matching. - (let ((case-fold-search completion-ignore-case)) + (let ((re (concat (completion-pcm--segments->regex segmented t) "\\'")) + (case-fold-search completion-ignore-case)) (dolist (str strs) (unless (string-match re str) (error "Internal error: %s doesn't match %s" str re)) @@ -4713,24 +4738,15 @@ the same set of elements." (push (substring str last next) chopped) (setq last next) (setq i (1+ i))) - ;; Add the text corresponding to the implicit trailing `any'. - (push (substring str last) chopped) (push (nreverse chopped) ccs)))) ;; Then for each of those non-constant elements, extract the ;; commonality between them. - (let ((res ()) - (fixed "") - ;; Accumulate each stretch of wildcards, and process them as a unit. - (wildcards ())) - ;; Make the implicit trailing `any' explicit. - (dolist (elem (append pattern '(any))) - (if (stringp elem) - (progn - (setq fixed (concat fixed elem)) - (setq wildcards nil)) + (let ((res ())) + (dolist (elem segmented) + (let ((fixed (car elem)) + (wildcards (cdr elem))) (let ((comps ())) - (push elem wildcards) (dolist (cc (prog1 ccs (setq ccs nil))) (push (car cc) comps) (push (cdr cc) ccs)) @@ -4768,7 +4784,8 @@ the same set of elements." (push prefix res) ;; Push all the wildcards in this stretch, to preserve `point' and ;; `star' wildcards before ELEM. - (setq res (append wildcards res)) + (dolist (wildcard wildcards) + (push wildcard res)) ;; Extract common suffix additionally to common prefix. ;; Don't do it for `any' since it could lead to a merged ;; completion that doesn't itself match the candidates. @@ -4786,10 +4803,7 @@ the same set of elements." comps)))))) (cl-assert (stringp suffix)) (unless (equal suffix "") - (push suffix res)))) - ;; We pushed these wildcards on RES, so we're done with them. - (setq wildcards nil)) - (setq fixed ""))))) + (push suffix res))))))))) ;; We return it in reverse order. res))))) diff --git a/test/lisp/minibuffer-tests.el b/test/lisp/minibuffer-tests.el index c7b24a4928d..8114899c0ff 100644 --- a/test/lisp/minibuffer-tests.el +++ b/test/lisp/minibuffer-tests.el @@ -268,7 +268,29 @@ (should (null (completion--pcm-first-difference-pos (car (completion-pcm-all-completions - "f" '("few" "many") nil 0)))))) + "f" '("few" "many") nil 0))))) + (should (equal + (completion--pcm-first-difference-pos + (car (completion-pcm-all-completions + "a*" '("ab" "ac") nil 2))) + 1)) + (should (equal + (completion--pcm-first-difference-pos + (car (completion-pcm-all-completions + "a*" '("ab" "ac") nil 1))) + 1)) + (should (equal + (completion--pcm-first-difference-pos + (car (completion-pcm-all-completions + "a*x" '("abx" "acx") nil 2))) + 1)) + (should (equal + (completion--pcm-first-difference-pos + (car (completion-pcm-all-completions + "a*x" '("abxd" "acxe") nil 3))) + ;; FIXME: the highlighting should start at the 4th character + ;; rather than the third. + 3))) (ert-deftest completion-pcm-test-6 () ;; Wildcards and delimiters work @@ -339,6 +361,20 @@ "-x" '("-_.x" "-__x") nil 2) '("-_x" . 3)))) +(ert-deftest completion-pcm-test-pattern->regex () + (should (equal (completion-pcm--pattern->regex + '("A" any prefix "B" point "C")) + "\\`A[^z-a]*?B[^z-a]*?C")) + (should (equal (completion-pcm--pattern->regex + '(any any-delim prefix "A" "B" "C")) + "\\`[^z-a]*?ABC")) + (should (equal (completion-pcm--pattern->regex + '(any-delim "A" "B" star "C")) + "\\`[-_./:| *]*?AB[^z-a]*?C")) + (should (equal (completion-pcm--pattern->regex + '(any "A" any-delim "B" any-delim "C" any)) + "\\`[^z-a]*?A[-_./:| *]*?B[-_./:| *]*?C[^z-a]*?"))) + (ert-deftest completion-pcm-bug4219 () ;; With `completion-ignore-case', try-completion should change the ;; case of existing text when the completions have different casing.