mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-02-16 17:24:23 +00:00
Optimize PCM regex to not contain adjacent wildcards
When multiple wildcards occur in a PCM pattern, completion-pcm--pattern->regex previously would generate one instance of [^z-a]* for each of those wildcards, even if the wildcards were adjacent and could therefore be matched by a single [^z-a]*. This can make regex matching performance much worse. For example, with a minibuffer containing "*/" with point at the start, completion-pcm-all-completions would take several seconds to match in project-find-file in the Emacs repo. Now, we run completion-pcm--pattern->segments on the pattern first, which adds additional structure to the pattern, including consolidating adjacent regexes into a single sublist. Then completion-pcm--segments->regex generates a single regex wildcard for each of those pattern wildcards. As a consequence, we need to update the callers of completion-pcm--pattern->regex which pass non-nil GROUP. This provides a substantial performance improvement in various edge cases. * lisp/minibuffer.el (completion-pcm--pattern->segments) (completion-pcm--segments->regex): Add. (bug#79693) (completion-pcm--pattern->regex) (completion-pcm--merge-completions): Call pattern->segments and segments->regex. (completion-pcm--pattern-point-idx): Delete. (completion-pcm--segments-point-idx): Add. (completion-pcm--hilit-commonality): Call completion-pcm--segments-point-idx to find the submatch containing point. * test/lisp/minibuffer-tests.el (completion-pcm-test-5): Add more tests of highlighting the first difference. (completion-pcm-test-pattern->regex): Add tests showing the regex form of PCM patterns.
This commit is contained in:
parent
1f00179df9
commit
97d2a659e8
2 changed files with 100 additions and 50 deletions
|
|
@ -4291,36 +4291,59 @@ or a symbol, see `completion-pcm--merge-completions'."
|
|||
(_ (push (pop p) n))))
|
||||
(nreverse n)))
|
||||
|
||||
(defun completion-pcm--pattern->regex (pattern &optional group)
|
||||
(let ((re
|
||||
(concat "\\`"
|
||||
(mapconcat
|
||||
(lambda (x)
|
||||
(cond
|
||||
((stringp x) (regexp-quote x))
|
||||
(t
|
||||
(let ((re (if (eq x 'any-delim)
|
||||
(concat completion-pcm--delim-wild-regex "*?")
|
||||
"[^z-a]*?")))
|
||||
(if (if (consp group) (memq x group) group)
|
||||
(concat "\\(" re "\\)")
|
||||
re)))))
|
||||
pattern
|
||||
""))))
|
||||
;; Avoid pathological backtracking.
|
||||
(while (string-match "\\.\\*\\?\\(?:\\\\[()]\\)*\\(\\.\\*\\?\\)" re)
|
||||
(setq re (replace-match "" t t re 1)))
|
||||
re))
|
||||
(defun completion-pcm--pattern->segments (pattern)
|
||||
"Segment PATTERN into more structured sublists.
|
||||
|
||||
(defun completion-pcm--pattern-point-idx (pattern)
|
||||
"Return index of subgroup corresponding to `point' element of PATTERN.
|
||||
Return nil if there's no such element."
|
||||
Returns a list of lists which when concatenated is semantically the same
|
||||
as PATTERN.
|
||||
|
||||
The first element in each sublist is a (possibly empty) string. The
|
||||
remaining elements in the sublist are all wildcard symbols. If PATTERN
|
||||
ends with a wildcard, then each sublist is guaranteed to have at least
|
||||
one wildcard."
|
||||
(let (ret)
|
||||
(while pattern
|
||||
(let ((fixed "")
|
||||
wildcards)
|
||||
;; Pop strings from PATTERN and concatenate them.
|
||||
(while (stringp (car-safe pattern))
|
||||
(setq fixed (concat fixed (pop pattern))))
|
||||
;; Pop wildcards from PATTERN.
|
||||
(while (and pattern (symbolp (car-safe pattern)))
|
||||
(push (pop pattern) wildcards))
|
||||
;; The sublist is a fixed string followed by all the wildcards.
|
||||
(push (cons fixed (nreverse wildcards)) ret)))
|
||||
(nreverse ret)))
|
||||
|
||||
(defun completion-pcm--segments->regex (segments &optional group)
|
||||
(concat "\\`"
|
||||
(mapconcat
|
||||
(lambda (segment)
|
||||
(concat
|
||||
(regexp-quote (car segment))
|
||||
(when (cdr segment)
|
||||
(concat
|
||||
(when group "\\(")
|
||||
(if (cl-every (lambda (x) (eq x 'any-delim)) (cdr segment))
|
||||
(concat completion-pcm--delim-wild-regex "*?")
|
||||
"[^z-a]*?")
|
||||
(when group "\\)")))))
|
||||
segments
|
||||
"")))
|
||||
|
||||
(defun completion-pcm--pattern->regex (pattern &optional group)
|
||||
(completion-pcm--segments->regex (completion-pcm--pattern->segments pattern) group))
|
||||
|
||||
(defun completion-pcm--segments-point-idx (segments)
|
||||
"Return index of subgroup corresponding to `point' element of SEGMENTS.
|
||||
Return nil if there's no such element.
|
||||
This is used in combination with `completion-pcm--segments->regex'."
|
||||
(let ((idx nil)
|
||||
(i 0))
|
||||
(dolist (x pattern)
|
||||
(unless (stringp x)
|
||||
(incf i)
|
||||
(if (eq x 'point) (setq idx i))))
|
||||
(dolist (x segments)
|
||||
(incf i)
|
||||
(when (memq 'point (cdr x))
|
||||
(setq idx i)))
|
||||
idx))
|
||||
|
||||
(defun completion-pcm--all-completions (prefix pattern table pred)
|
||||
|
|
@ -4551,8 +4574,9 @@ see) for later lazy highlighting."
|
|||
completion-lazy-hilit-fn nil)
|
||||
(cond
|
||||
((and completions (cl-loop for e in pattern thereis (stringp e)))
|
||||
(let* ((re (completion-pcm--pattern->regex pattern 'group))
|
||||
(point-idx (completion-pcm--pattern-point-idx pattern)))
|
||||
(let* ((segments (completion-pcm--pattern->segments pattern))
|
||||
(re (completion-pcm--segments->regex segments 'group))
|
||||
(point-idx (completion-pcm--segments-point-idx segments)))
|
||||
(setq completion-pcm--regexp re)
|
||||
(cond (completion-lazy-hilit
|
||||
(setq completion-lazy-hilit-fn
|
||||
|
|
@ -4696,12 +4720,13 @@ the same set of elements."
|
|||
(cond
|
||||
((null (cdr strs)) (list (car strs)))
|
||||
(t
|
||||
(let ((re (completion-pcm--pattern->regex pattern 'group))
|
||||
(let ((segmented (completion-pcm--pattern->segments (append pattern '(any))))
|
||||
(ccs ())) ;Chopped completions.
|
||||
|
||||
;; First chop each string into the parts corresponding to each
|
||||
;; non-constant element of `pattern', using regexp-matching.
|
||||
(let ((case-fold-search completion-ignore-case))
|
||||
(let ((re (concat (completion-pcm--segments->regex segmented t) "\\'"))
|
||||
(case-fold-search completion-ignore-case))
|
||||
(dolist (str strs)
|
||||
(unless (string-match re str)
|
||||
(error "Internal error: %s doesn't match %s" str re))
|
||||
|
|
@ -4713,24 +4738,15 @@ the same set of elements."
|
|||
(push (substring str last next) chopped)
|
||||
(setq last next)
|
||||
(setq i (1+ i)))
|
||||
;; Add the text corresponding to the implicit trailing `any'.
|
||||
(push (substring str last) chopped)
|
||||
(push (nreverse chopped) ccs))))
|
||||
|
||||
;; Then for each of those non-constant elements, extract the
|
||||
;; commonality between them.
|
||||
(let ((res ())
|
||||
(fixed "")
|
||||
;; Accumulate each stretch of wildcards, and process them as a unit.
|
||||
(wildcards ()))
|
||||
;; Make the implicit trailing `any' explicit.
|
||||
(dolist (elem (append pattern '(any)))
|
||||
(if (stringp elem)
|
||||
(progn
|
||||
(setq fixed (concat fixed elem))
|
||||
(setq wildcards nil))
|
||||
(let ((res ()))
|
||||
(dolist (elem segmented)
|
||||
(let ((fixed (car elem))
|
||||
(wildcards (cdr elem)))
|
||||
(let ((comps ()))
|
||||
(push elem wildcards)
|
||||
(dolist (cc (prog1 ccs (setq ccs nil)))
|
||||
(push (car cc) comps)
|
||||
(push (cdr cc) ccs))
|
||||
|
|
@ -4768,7 +4784,8 @@ the same set of elements."
|
|||
(push prefix res)
|
||||
;; Push all the wildcards in this stretch, to preserve `point' and
|
||||
;; `star' wildcards before ELEM.
|
||||
(setq res (append wildcards res))
|
||||
(dolist (wildcard wildcards)
|
||||
(push wildcard res))
|
||||
;; Extract common suffix additionally to common prefix.
|
||||
;; Don't do it for `any' since it could lead to a merged
|
||||
;; completion that doesn't itself match the candidates.
|
||||
|
|
@ -4786,10 +4803,7 @@ the same set of elements."
|
|||
comps))))))
|
||||
(cl-assert (stringp suffix))
|
||||
(unless (equal suffix "")
|
||||
(push suffix res))))
|
||||
;; We pushed these wildcards on RES, so we're done with them.
|
||||
(setq wildcards nil))
|
||||
(setq fixed "")))))
|
||||
(push suffix res)))))))))
|
||||
;; We return it in reverse order.
|
||||
res)))))
|
||||
|
||||
|
|
|
|||
|
|
@ -268,7 +268,29 @@
|
|||
(should (null
|
||||
(completion--pcm-first-difference-pos
|
||||
(car (completion-pcm-all-completions
|
||||
"f" '("few" "many") nil 0))))))
|
||||
"f" '("few" "many") nil 0)))))
|
||||
(should (equal
|
||||
(completion--pcm-first-difference-pos
|
||||
(car (completion-pcm-all-completions
|
||||
"a*" '("ab" "ac") nil 2)))
|
||||
1))
|
||||
(should (equal
|
||||
(completion--pcm-first-difference-pos
|
||||
(car (completion-pcm-all-completions
|
||||
"a*" '("ab" "ac") nil 1)))
|
||||
1))
|
||||
(should (equal
|
||||
(completion--pcm-first-difference-pos
|
||||
(car (completion-pcm-all-completions
|
||||
"a*x" '("abx" "acx") nil 2)))
|
||||
1))
|
||||
(should (equal
|
||||
(completion--pcm-first-difference-pos
|
||||
(car (completion-pcm-all-completions
|
||||
"a*x" '("abxd" "acxe") nil 3)))
|
||||
;; FIXME: the highlighting should start at the 4th character
|
||||
;; rather than the third.
|
||||
3)))
|
||||
|
||||
(ert-deftest completion-pcm-test-6 ()
|
||||
;; Wildcards and delimiters work
|
||||
|
|
@ -339,6 +361,20 @@
|
|||
"-x" '("-_.x" "-__x") nil 2)
|
||||
'("-_x" . 3))))
|
||||
|
||||
(ert-deftest completion-pcm-test-pattern->regex ()
|
||||
(should (equal (completion-pcm--pattern->regex
|
||||
'("A" any prefix "B" point "C"))
|
||||
"\\`A[^z-a]*?B[^z-a]*?C"))
|
||||
(should (equal (completion-pcm--pattern->regex
|
||||
'(any any-delim prefix "A" "B" "C"))
|
||||
"\\`[^z-a]*?ABC"))
|
||||
(should (equal (completion-pcm--pattern->regex
|
||||
'(any-delim "A" "B" star "C"))
|
||||
"\\`[-_./:| *]*?AB[^z-a]*?C"))
|
||||
(should (equal (completion-pcm--pattern->regex
|
||||
'(any "A" any-delim "B" any-delim "C" any))
|
||||
"\\`[^z-a]*?A[-_./:| *]*?B[-_./:| *]*?C[^z-a]*?")))
|
||||
|
||||
(ert-deftest completion-pcm-bug4219 ()
|
||||
;; With `completion-ignore-case', try-completion should change the
|
||||
;; case of existing text when the completions have different casing.
|
||||
|
|
|
|||
Loading…
Reference in a new issue