* lisp/char-fold.el (char-fold-to-regexp): Implement arg LAX (bug#36398).

* test/lisp/char-fold-tests.el (char-fold--test-multi-lax): New test.
This commit is contained in:
Juri Linkov 2019-07-04 23:49:33 +03:00
parent 4a754df885
commit 19b1cefa3b
2 changed files with 45 additions and 27 deletions

View file

@ -148,12 +148,18 @@ Exceptionally for the space character (32), ALIST is ignored.")
(make-list n (or (aref char-fold-table ?\s) " ")))))
;;;###autoload
(defun char-fold-to-regexp (string &optional _lax from)
(defun char-fold-to-regexp (string &optional lax from)
"Return a regexp matching anything that char-folds into STRING.
Any character in STRING that has an entry in
`char-fold-table' is replaced with that entry (which is a
regexp) and other characters are `regexp-quote'd.
When LAX is non-nil, then the final character also matches ligatures
partially, for instance, the search string \"f\" will match \"fi\",
so when typing the search string in isearch while the cursor is on
a ligature, the search won't try to immediately advance to the next
complete match, but will stay on the partially matched ligature.
If the resulting regexp would be too long for Emacs to handle,
just return the result of calling `regexp-quote' on STRING.
@ -183,36 +189,40 @@ from which to start."
;; Long string. The regexp would probably be too long.
(alist (unless (> end 50)
(aref multi-char-table c))))
(push (let ((matched-entries nil)
(max-length 0))
(dolist (entry alist)
(let* ((suffix (car entry))
(len-suf (length suffix)))
(when (eq (compare-strings suffix 0 nil
string (1+ i) (+ i 1 len-suf)
nil)
t)
(push (cons len-suf (cdr entry)) matched-entries)
(setq max-length (max max-length len-suf)))))
;; If no suffixes matched, just go on.
(if (not matched-entries)
regexp
(push (if (and lax alist (= (1+ i) end))
(concat "\\(?:" regexp "\\|"
(mapconcat (lambda (entry)
(cdr entry)) alist "\\|") "\\)")
(let ((matched-entries nil)
(max-length 0))
(dolist (entry alist)
(let* ((suffix (car entry))
(len-suf (length suffix)))
(when (eq (compare-strings suffix 0 nil
string (1+ i) (+ i 1 len-suf)
nil)
t)
(push (cons len-suf (cdr entry)) matched-entries)
(setq max-length (max max-length len-suf)))))
;; If no suffixes matched, just go on.
(if (not matched-entries)
regexp
;;; If N suffixes match, we "branch" out into N+1 executions for the
;;; length of the longest match. This means "fix" will match "fix" but
;;; not "fⅸ", but it's necessary to keep the regexp size from scaling
;;; exponentially. See https://lists.gnu.org/r/emacs-devel/2015-11/msg02562.html
(let ((subs (substring string (1+ i) (+ i 1 max-length))))
;; `i' is still going to inc by 1 below.
(setq i (+ i max-length))
(concat
"\\(?:"
(mapconcat (lambda (entry)
(let ((length (car entry))
(suffix-regexp (cdr entry)))
(concat suffix-regexp
(char-fold-to-regexp subs nil length))))
`((0 . ,regexp) . ,matched-entries) "\\|")
"\\)"))))
(let ((subs (substring string (1+ i) (+ i 1 max-length))))
;; `i' is still going to inc by 1 below.
(setq i (+ i max-length))
(concat
"\\(?:"
(mapconcat (lambda (entry)
(let ((length (car entry))
(suffix-regexp (cdr entry)))
(concat suffix-regexp
(char-fold-to-regexp subs nil length))))
`((0 . ,regexp) . ,matched-entries) "\\|")
"\\)")))))
out))))
(setq i (1+ i)))
(when (> spaces 0)

View file

@ -82,6 +82,14 @@
(set-char-table-extra-slot char-fold-table 0 multi)
(char-fold--test-match-exactly (car it) (cdr it)))))
(ert-deftest char-fold--test-multi-lax ()
(dolist (it '(("f" . "") ("f" . "")))
(with-temp-buffer
(insert (cdr it))
(goto-char (point-min))
(should (search-forward-regexp
(char-fold-to-regexp (car it) 'lax) nil 'noerror)))))
(ert-deftest char-fold--test-fold-to-regexp ()
(let ((char-fold-table (make-char-table 'char-fold-table))
(multi (make-char-table 'char-fold-table)))