Fix VIQR decoding and encoding

* lisp/language/viet-util.el (viqr-regexp): Include escaped
punctuation characters, to handle escaped punctuation correctly.
(viqr-punctuation-regexp): New defconst.
(viet-encode-viqr-region): Backslash-escape punctuation characters
matching 'viqr-punctuation-regexp' before encoding Vietnamese
characters with diacritics.  (Bug#80039)

* test/lisp/language/viet-util-tests.el (viet-util-test-viqr): New
file with tests for VIQR encoding and decoding.
This commit is contained in:
Eli Zaretskii 2025-12-21 12:52:17 +02:00
parent 1eb247af73
commit 33fab945d3
2 changed files with 44 additions and 3 deletions

View file

@ -220,10 +220,13 @@
)
"Alist of Vietnamese characters vs corresponding `VIQR' string.")
;; Regular expression matching single Vietnamese character represented
;; by VIQR.
(defconst viqr-regexp
"[aeiouyAEIOUY]\\([(^+]?['`?~.]\\|[(^+]\\)\\|[Dd][Dd]")
"[aeiouyAEIOUY]\\([(^+]?['`?~.]\\|[(^+]\\)\\|[Dd][Dd]\\|\\\\[(^+'`?~.d\\]"
"Regular expression matching VIQR representation of a single character.")
(defconst viqr-punctuation-regexp
"[(^+]?['`?~.]"
"Regular expression matching punctuation chars that must be escaped in VIQR.")
;;;###autoload
(defun viet-decode-viqr-region (from to)
@ -257,6 +260,7 @@ positions (integers or markers) specifying the stretch of the region."
(save-restriction
(narrow-to-region from to)
(goto-char (point-min))
(replace-regexp-in-region viqr-punctuation-regexp "\\\\\\&")
(while (re-search-forward "\\cv" nil t)
(let* ((ch (preceding-char))
(viqr (cdr (assq ch viet-viqr-alist))))

View file

@ -0,0 +1,37 @@
;;; viet-util-tests.el --- unit tests for viet-util.el -*- lexical-binding: t; -*-
;; Copyright (C) 2025 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; Unit tests for lisp/language/viet-util.el.
;;; Code:
(ert-deftest viet-util-test-viqr ()
"Test bug#80039."
(let ((viqr-text "O^ng te^n gi`\\? To^i te^n la` Tra^`n Va(n Hie^'u\\.")
(viet-text "Ông tên gì? Tôi tên là Trần Văn Hiếu."))
(with-temp-buffer
(insert viqr-text)
(viet-decode-viqr-region (point-min) (point-max))
(should (equal (buffer-string) viet-text))
(viet-encode-viqr-region (point-min) (point-max))
(should (equal (buffer-string) viqr-text)))))
;;; viet-util--tests.el ends here