mirror of
git://git.sv.gnu.org/emacs.git
synced 2026-02-16 17:24:23 +00:00
Minor fixes in filepos-to-bufferpos
* lisp/international/mule-util.el (filepos-to-bufferpos): Remove test for utf-8-emacs. Exempt single-byte encodings from the 'use-exact' path when QUALITY is 'exact'. Test UTF-16 encodings for BOM before subtracting 2 bytes. Use 'identity' when adjusting UTF-16 encoded files for CR-LF EOLs.
This commit is contained in:
parent
c4151ebe15
commit
a2bb6c7bc1
1 changed files with 9 additions and 7 deletions
|
|
@ -353,15 +353,10 @@ QUALITY can be:
|
|||
(type (coding-system-type coding-system))
|
||||
(base (coding-system-base coding-system))
|
||||
(pm (save-restriction (widen) (point-min))))
|
||||
(and (eq type 'utf-8-emacs)
|
||||
(setq type 'utf-8))
|
||||
(and (eq type 'utf-8)
|
||||
;; Any post-read/pre-write conversions mean it's not really UTF-8.
|
||||
(not (null (coding-system-get coding-system :pos-read-conversion)))
|
||||
(setq type 'not-utf-8))
|
||||
(and (not (eq type 'utf-8))
|
||||
(eq quality 'exact)
|
||||
(setq type 'use-exact))
|
||||
(and (memq type '(charset raw-text undecided))
|
||||
;; The following are all of type 'charset', but they are
|
||||
;; actually variable-width encodings.
|
||||
|
|
@ -370,6 +365,12 @@ QUALITY can be:
|
|||
japanese-iso-8bit chinese-big5-hkscs
|
||||
japanese-cp932 korean-cp949)))
|
||||
(setq type 'single-byte))
|
||||
;; Any encoding that's not single-byte and not UTF-8 must use the
|
||||
;; 'exact' path if QUALITY is 'exact', because we have no simple
|
||||
;; mappings for those cases.
|
||||
(and (not (memq type '(utf-8 single-byte)))
|
||||
(eq quality 'exact)
|
||||
(setq type 'use-exact))
|
||||
(pcase type
|
||||
(`utf-8
|
||||
(when (coding-system-get coding-system :bom)
|
||||
|
|
@ -379,12 +380,13 @@ QUALITY can be:
|
|||
(byte-to-position (+ pm byte))))
|
||||
(`utf-16
|
||||
;; Account for BOM, which is always 2 bytes in UTF-16.
|
||||
(setq byte (- byte 2))
|
||||
(when (coding-system-get coding-system :bom)
|
||||
(setq byte (- byte 2)))
|
||||
;; In approximate mode, assume all characters are within the
|
||||
;; BMP, i.e. take up 2 bytes.
|
||||
(setq byte (/ byte 2))
|
||||
(if (= eol 1)
|
||||
(filepos-to-bufferpos--dos (+ pm byte) #'byte-to-position)
|
||||
(filepos-to-bufferpos--dos (+ pm byte) #'identity)
|
||||
(byte-to-position (+ pm byte))))
|
||||
(`single-byte
|
||||
(if (= eol 1)
|
||||
|
|
|
|||
Loading…
Reference in a new issue