Optimize PCM regex to not contain adjacent wildcards

When multiple wildcards occur in a PCM pattern,
completion-pcm--pattern->regex previously would generate one
instance of [^z-a]* for each of those wildcards, even if the
wildcards were adjacent and could therefore be matched by a
single [^z-a]*.  This can make regex matching performance much
worse.  For example, with a minibuffer containing "*/" with
point at the start, completion-pcm-all-completions would take
several seconds to match in project-find-file in the Emacs repo.

Now, we run completion-pcm--pattern->segments on the pattern
first, which adds additional structure to the pattern, including
consolidating adjacent regexes into a single sublist.  Then
completion-pcm--segments->regex generates a single regex
wildcard for each of those pattern wildcards.  As a consequence,
we need to update the callers of completion-pcm--pattern->regex
which pass non-nil GROUP.  This provides a substantial
performance improvement in various edge cases.

* lisp/minibuffer.el (completion-pcm--pattern->segments)
(completion-pcm--segments->regex): Add.  (bug#79693)
(completion-pcm--pattern->regex)
(completion-pcm--merge-completions): Call pattern->segments and
segments->regex.
(completion-pcm--pattern-point-idx): Delete.
(completion-pcm--segments-point-idx): Add.
(completion-pcm--hilit-commonality): Call
completion-pcm--segments-point-idx to find the submatch
containing point.
* test/lisp/minibuffer-tests.el (completion-pcm-test-5): Add
more tests of highlighting the first difference.
(completion-pcm-test-pattern->regex): Add tests showing the
regex form of PCM patterns.
This commit is contained in:
Spencer Baugh 2025-10-24 15:58:44 -04:00 committed by Juri Linkov
parent 1f00179df9
commit 97d2a659e8
2 changed files with 100 additions and 50 deletions

View file

@ -4291,36 +4291,59 @@ or a symbol, see `completion-pcm--merge-completions'."
(_ (push (pop p) n))))
(nreverse n)))
(defun completion-pcm--pattern->regex (pattern &optional group)
(let ((re
(concat "\\`"
(mapconcat
(lambda (x)
(cond
((stringp x) (regexp-quote x))
(t
(let ((re (if (eq x 'any-delim)
(concat completion-pcm--delim-wild-regex "*?")
"[^z-a]*?")))
(if (if (consp group) (memq x group) group)
(concat "\\(" re "\\)")
re)))))
pattern
""))))
;; Avoid pathological backtracking.
(while (string-match "\\.\\*\\?\\(?:\\\\[()]\\)*\\(\\.\\*\\?\\)" re)
(setq re (replace-match "" t t re 1)))
re))
(defun completion-pcm--pattern->segments (pattern)
"Segment PATTERN into more structured sublists.
(defun completion-pcm--pattern-point-idx (pattern)
"Return index of subgroup corresponding to `point' element of PATTERN.
Return nil if there's no such element."
Returns a list of lists which when concatenated is semantically the same
as PATTERN.
The first element in each sublist is a (possibly empty) string. The
remaining elements in the sublist are all wildcard symbols. If PATTERN
ends with a wildcard, then each sublist is guaranteed to have at least
one wildcard."
(let (ret)
(while pattern
(let ((fixed "")
wildcards)
;; Pop strings from PATTERN and concatenate them.
(while (stringp (car-safe pattern))
(setq fixed (concat fixed (pop pattern))))
;; Pop wildcards from PATTERN.
(while (and pattern (symbolp (car-safe pattern)))
(push (pop pattern) wildcards))
;; The sublist is a fixed string followed by all the wildcards.
(push (cons fixed (nreverse wildcards)) ret)))
(nreverse ret)))
(defun completion-pcm--segments->regex (segments &optional group)
(concat "\\`"
(mapconcat
(lambda (segment)
(concat
(regexp-quote (car segment))
(when (cdr segment)
(concat
(when group "\\(")
(if (cl-every (lambda (x) (eq x 'any-delim)) (cdr segment))
(concat completion-pcm--delim-wild-regex "*?")
"[^z-a]*?")
(when group "\\)")))))
segments
"")))
(defun completion-pcm--pattern->regex (pattern &optional group)
(completion-pcm--segments->regex (completion-pcm--pattern->segments pattern) group))
(defun completion-pcm--segments-point-idx (segments)
"Return index of subgroup corresponding to `point' element of SEGMENTS.
Return nil if there's no such element.
This is used in combination with `completion-pcm--segments->regex'."
(let ((idx nil)
(i 0))
(dolist (x pattern)
(unless (stringp x)
(incf i)
(if (eq x 'point) (setq idx i))))
(dolist (x segments)
(incf i)
(when (memq 'point (cdr x))
(setq idx i)))
idx))
(defun completion-pcm--all-completions (prefix pattern table pred)
@ -4551,8 +4574,9 @@ see) for later lazy highlighting."
completion-lazy-hilit-fn nil)
(cond
((and completions (cl-loop for e in pattern thereis (stringp e)))
(let* ((re (completion-pcm--pattern->regex pattern 'group))
(point-idx (completion-pcm--pattern-point-idx pattern)))
(let* ((segments (completion-pcm--pattern->segments pattern))
(re (completion-pcm--segments->regex segments 'group))
(point-idx (completion-pcm--segments-point-idx segments)))
(setq completion-pcm--regexp re)
(cond (completion-lazy-hilit
(setq completion-lazy-hilit-fn
@ -4696,12 +4720,13 @@ the same set of elements."
(cond
((null (cdr strs)) (list (car strs)))
(t
(let ((re (completion-pcm--pattern->regex pattern 'group))
(let ((segmented (completion-pcm--pattern->segments (append pattern '(any))))
(ccs ())) ;Chopped completions.
;; First chop each string into the parts corresponding to each
;; non-constant element of `pattern', using regexp-matching.
(let ((case-fold-search completion-ignore-case))
(let ((re (concat (completion-pcm--segments->regex segmented t) "\\'"))
(case-fold-search completion-ignore-case))
(dolist (str strs)
(unless (string-match re str)
(error "Internal error: %s doesn't match %s" str re))
@ -4713,24 +4738,15 @@ the same set of elements."
(push (substring str last next) chopped)
(setq last next)
(setq i (1+ i)))
;; Add the text corresponding to the implicit trailing `any'.
(push (substring str last) chopped)
(push (nreverse chopped) ccs))))
;; Then for each of those non-constant elements, extract the
;; commonality between them.
(let ((res ())
(fixed "")
;; Accumulate each stretch of wildcards, and process them as a unit.
(wildcards ()))
;; Make the implicit trailing `any' explicit.
(dolist (elem (append pattern '(any)))
(if (stringp elem)
(progn
(setq fixed (concat fixed elem))
(setq wildcards nil))
(let ((res ()))
(dolist (elem segmented)
(let ((fixed (car elem))
(wildcards (cdr elem)))
(let ((comps ()))
(push elem wildcards)
(dolist (cc (prog1 ccs (setq ccs nil)))
(push (car cc) comps)
(push (cdr cc) ccs))
@ -4768,7 +4784,8 @@ the same set of elements."
(push prefix res)
;; Push all the wildcards in this stretch, to preserve `point' and
;; `star' wildcards before ELEM.
(setq res (append wildcards res))
(dolist (wildcard wildcards)
(push wildcard res))
;; Extract common suffix additionally to common prefix.
;; Don't do it for `any' since it could lead to a merged
;; completion that doesn't itself match the candidates.
@ -4786,10 +4803,7 @@ the same set of elements."
comps))))))
(cl-assert (stringp suffix))
(unless (equal suffix "")
(push suffix res))))
;; We pushed these wildcards on RES, so we're done with them.
(setq wildcards nil))
(setq fixed "")))))
(push suffix res)))))))))
;; We return it in reverse order.
res)))))

View file

@ -268,7 +268,29 @@
(should (null
(completion--pcm-first-difference-pos
(car (completion-pcm-all-completions
"f" '("few" "many") nil 0))))))
"f" '("few" "many") nil 0)))))
(should (equal
(completion--pcm-first-difference-pos
(car (completion-pcm-all-completions
"a*" '("ab" "ac") nil 2)))
1))
(should (equal
(completion--pcm-first-difference-pos
(car (completion-pcm-all-completions
"a*" '("ab" "ac") nil 1)))
1))
(should (equal
(completion--pcm-first-difference-pos
(car (completion-pcm-all-completions
"a*x" '("abx" "acx") nil 2)))
1))
(should (equal
(completion--pcm-first-difference-pos
(car (completion-pcm-all-completions
"a*x" '("abxd" "acxe") nil 3)))
;; FIXME: the highlighting should start at the 4th character
;; rather than the third.
3)))
(ert-deftest completion-pcm-test-6 ()
;; Wildcards and delimiters work
@ -339,6 +361,20 @@
"-x" '("-_.x" "-__x") nil 2)
'("-_x" . 3))))
(ert-deftest completion-pcm-test-pattern->regex ()
(should (equal (completion-pcm--pattern->regex
'("A" any prefix "B" point "C"))
"\\`A[^z-a]*?B[^z-a]*?C"))
(should (equal (completion-pcm--pattern->regex
'(any any-delim prefix "A" "B" "C"))
"\\`[^z-a]*?ABC"))
(should (equal (completion-pcm--pattern->regex
'(any-delim "A" "B" star "C"))
"\\`[-_./:| *]*?AB[^z-a]*?C"))
(should (equal (completion-pcm--pattern->regex
'(any "A" any-delim "B" any-delim "C" any))
"\\`[^z-a]*?A[-_./:| *]*?B[-_./:| *]*?C[^z-a]*?")))
(ert-deftest completion-pcm-bug4219 ()
;; With `completion-ignore-case', try-completion should change the
;; case of existing text when the completions have different casing.