Correctly handle non-BMP characters in Android content file names

* lisp/term/android-win.el (android-encode-jni)
(android-decode-jni, android-jni): New coding system, for
Android file names and runtime data.

* src/androidterm.h (syms_of_androidvfs): New function.

* src/androidvfs.c (struct android_special_vnode): New field
special_coding_system.
(android_saf_tree_readdir): Decode the file name using the
android-jni coding system.
(special_vnodes): <contents>: Specify a file name coding system.
(android_vfs_convert_name): New function.
(android_root_name): If a special coding system be specified for
a special vnode, convert components to it before invoking its
name function.
(syms_of_androidvfs): New symbol Qandroid_jni.

* src/emacs.c (android_emacs_init): Call syms_of_androidvfs.
This commit is contained in:
Po Lu 2024-03-23 15:37:43 +08:00
parent 7e32e8392a
commit e39cb515a1
4 changed files with 215 additions and 17 deletions

View file

@ -528,6 +528,95 @@ accessible to other programs."
(setq url replacement-url))
(android-browse-url-internal url send))
;; Coding systems used by androidvfs.c.
(define-ccl-program android-encode-jni
`(2 ((loop
(read r0)
(if (r0 < #x1) ; 0x0 is encoded specially in JNI environments.
((write #xc0)
(write #x80))
((if (r0 < #x80) ; ASCII
((write r0))
(if (r0 < #x800) ; \u0080 - \u07ff
((write ((r0 >> 6) | #xC0))
(write ((r0 & #x3F) | #x80)))
;; \u0800 - \uFFFF
(if (r0 < #x10000)
((write ((r0 >> 12) | #xE0))
(write (((r0 >> 6) & #x3F) | #x80))
(write ((r0 & #x3F) | #x80)))
;; Supplementary characters must be converted into
;; surrogate pairs before encoding.
(;; High surrogate
(r1 = ((((r0 - #x10000) >> 10) & #x3ff) + #xD800))
;; Low surrogate.
(r2 = (((r0 - #x10000) & #x3ff) + #xDC00))
;; Write both surrogate characters.
(write ((r1 >> 12) | #xE0))
(write (((r1 >> 6) & #x3F) | #x80))
(write ((r1 & #x3F) | #x80))
(write ((r2 >> 12) | #xE0))
(write (((r2 >> 6) & #x3F) | #x80))
(write ((r2 & #x3F) | #x80))))))))
(repeat))))
"Encode characters from the input buffer for Java virtual machines.")
(define-ccl-program android-decode-jni
`(1 ((loop
((read-if (r0 >= #x80) ; More than a one-byte sequence?
((if (r0 < #xe0)
;; Two-byte sequence; potentially a NULL
;; character.
((read r4)
(r4 &= #x3f)
(r0 = (((r0 & #x1f) << 6) | r4)))
(if (r0 < ?\xF0)
;; Three-byte sequence, after which surrogate
;; pairs should be processed.
((read r4 r6)
(r4 = ((r4 & #x3f) << 6))
(r6 &= #x3f)
(r0 = ((((r0 & #xf) << 12) | r4) | r6)))
;; Four-byte sequences are not valid under the
;; JVM specification, but Android produces them
;; when encoding Emoji characters for being
;; supposedly less of a surprise to applications.
;; This is obviously not true of programs written
;; to the letter of the documentation, but 50
;; million Frenchmen make a right (and this
;; deviation from the norm is predictably absent
;; from Android's documentation on the subject).
((read r1 r4 r6)
(r1 = ((r1 & #x3f) << 12))
(r4 = ((r4 & #x3f) << 6))
(r6 &= #x3F)
(r0 = (((((r0 & #x07) << 18) | r1) | r4) | r6))))))))
(if ((r0 & #xf800) == #xd800)
;; High surrogate.
((read-if (r2 >= #xe0)
((r0 = ((r0 & #x3ff) << 10))
(read r4 r6)
(r4 = ((r4 & #x3f) << 6))
(r6 &= #x3f)
(r1 = ((((r2 & #xf) << 12) | r4) | r6))
(r0 = (((r1 & #x3ff) | r0) + #xffff))))))
(write r0)
(repeat))))
"Decode JVM-encoded characters in the input buffer.")
(define-coding-system 'android-jni
"CESU-8 based encoding for communication with the Android runtime."
:mnemonic ?J
:coding-type 'ccl
:eol-type 'unix
:ascii-compatible-p nil ; for \0 is encoded as a two-byte sequence.
:default-char ?\0
:charset-list '(unicode)
:ccl-decoder 'android-decode-jni
:ccl-encoder 'android-encode-jni)
(provide 'android-win)
;; android-win.el ends here.

View file

@ -461,7 +461,7 @@ extern void sfntfont_android_shrink_scanline_buffer (void);
extern void init_sfntfont_android (void);
extern void syms_of_sfntfont_android (void);
/* Defined in androidselect.c */
/* Defined in androidselect.c. */
#ifndef ANDROID_STUBIFY
@ -473,6 +473,9 @@ extern void android_notification_action (struct android_notification_event *,
extern void init_androidselect (void);
extern void syms_of_androidselect (void);
/* Defined in androidvfs.c. */
extern void syms_of_androidvfs (void);
#endif

View file

@ -38,8 +38,10 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include <linux/ashmem.h>
#include "android.h"
#include "androidterm.h"
#include "systime.h"
#include "blockinput.h"
#include "coding.h"
#if __ANDROID_API__ >= 9
#include <android/asset_manager.h>
@ -248,8 +250,14 @@ struct android_special_vnode
/* Function called to create the initial vnode from the rest of the
component. */
struct android_vnode *(*initial) (char *, size_t);
/* If non-nil, an encoding system into which file name buffers are to
be re-encoded before being handed to VFS functions. */
Lisp_Object special_coding_system;
};
verify (NIL_IS_ZERO); /* special_coding_system above. */
enum android_vnode_type
{
ANDROID_VNODE_UNIX,
@ -3867,7 +3875,8 @@ android_saf_root_readdir (struct android_vdir *vdir)
NULL);
android_exception_check_nonnull ((void *) chars, string);
/* Figure out how large it is, and then resize dirent to fit. */
/* Figure out how large it is, and then resize dirent to fit--this
string is always ASCII. */
length = strlen (chars) + 1;
size = offsetof (struct dirent, d_name) + length;
dirent = xrealloc (dirent, size);
@ -5479,6 +5488,7 @@ android_saf_tree_readdir (struct android_vdir *vdir)
jmethodID method;
size_t length, size;
const char *chars;
struct coding_system coding;
dir = (struct android_saf_tree_vdir *) vdir;
@ -5526,9 +5536,25 @@ android_saf_tree_readdir (struct android_vdir *vdir)
NULL);
android_exception_check_nonnull ((void *) chars, d_name);
/* Figure out how large it is, and then resize dirent to fit. */
/* Decode this JNI string into utf-8-emacs; see
android_vfs_convert_name for considerations regarding coding
systems. */
length = strlen (chars);
setup_coding_system (Qandroid_jni, &coding);
coding.mode |= CODING_MODE_LAST_BLOCK;
coding.source = (const unsigned char *) chars;
coding.dst_bytes = 0;
coding.destination = NULL;
decode_coding_object (&coding, Qnil, 0, 0, length, length, Qnil);
/* Release the string data and the local reference to STRING. */
(*android_java_env)->ReleaseStringUTFChars (android_java_env,
(jstring) d_name,
chars);
/* Resize dirent to accommodate the decoded text. */
length = strlen (chars) + 1;
size = offsetof (struct dirent, d_name) + length;
size = offsetof (struct dirent, d_name) + 1 + coding.produced;
dirent = xrealloc (dirent, size);
/* Clear dirent. */
@ -5540,12 +5566,12 @@ android_saf_tree_readdir (struct android_vdir *vdir)
dirent->d_off = 0;
dirent->d_reclen = size;
dirent->d_type = d_type ? DT_DIR : DT_UNKNOWN;
strcpy (dirent->d_name, chars);
memcpy (dirent->d_name, coding.destination, coding.produced);
dirent->d_name[coding.produced] = '\0';
/* Free the coding system destination buffer. */
xfree (coding.destination);
/* Release the string data and the local reference to STRING. */
(*android_java_env)->ReleaseStringUTFChars (android_java_env,
(jstring) d_name,
chars);
ANDROID_DELETE_LOCAL_REF (d_name);
return dirent;
}
@ -6531,9 +6557,35 @@ static struct android_vops root_vfs_ops =
static struct android_special_vnode special_vnodes[] =
{
{ "assets", 6, android_afs_initial, },
{ "content", 7, android_content_initial, },
{ "content", 7, android_content_initial,
LISPSYM_INITIALLY (Qandroid_jni), },
};
/* Convert the file name NAME from Emacs's internal character encoding
to CODING, and return a Lisp string with the data so produced.
Calling this function creates an implicit assumption that
file-name-coding-system is compatible with utf-8-emacs, which is not
unacceptable as users with cause to modify file-name-coding-system
should be aware and prepared for consequences towards files stored on
different filesystems, including virtual ones. */
static Lisp_Object
android_vfs_convert_name (const char *name, Lisp_Object coding)
{
Lisp_Object src_coding, name1;
src_coding = Qutf_8_emacs;
/* Convert the contents of the buffer after BUFFER_END
from the file name coding system to
special->special_coding_system. */
AUTO_STRING (file_name, name);
name1 = code_convert_string_norecord (file_name, src_coding, false);
name1 = code_convert_string (name1, coding, Qt, true, true, true);
return name1;
}
static struct android_vnode *
android_root_name (struct android_vnode *vnode, char *name,
size_t length)
@ -6541,6 +6593,8 @@ android_root_name (struct android_vnode *vnode, char *name,
char *component_end;
struct android_special_vnode *special;
size_t i;
Lisp_Object file_name;
struct android_vnode *vp;
/* Skip any leading separator in NAME. */
@ -6567,8 +6621,29 @@ android_root_name (struct android_vnode *vnode, char *name,
if (component_end - name == special->length
&& !memcmp (special->name, name, special->length))
return (*special->initial) (component_end,
length - special->length);
{
if (!NILP (special->special_coding_system))
{
USE_SAFE_ALLOCA;
file_name
= android_vfs_convert_name (component_end,
special->special_coding_system);
/* Allocate a buffer and copy file_name into the same. */
length = SBYTES (file_name) + 1;
name = SAFE_ALLOCA (length + 1);
/* Copy the trailing NULL byte also. */
memcpy (name, SDATA (file_name), length);
vp = (*special->initial) (name, length - 1);
SAFE_FREE ();
return vp;
}
return (*special->initial) (component_end,
length - special->length);
}
/* Detect the case where a special is named with a trailing
directory separator. */
@ -6576,9 +6651,30 @@ android_root_name (struct android_vnode *vnode, char *name,
if (component_end - name == special->length + 1
&& !memcmp (special->name, name, special->length)
&& name[special->length] == '/')
/* Make sure to include the directory separator. */
return (*special->initial) (component_end - 1,
length - special->length);
{
if (!NILP (special->special_coding_system))
{
USE_SAFE_ALLOCA;
file_name
= android_vfs_convert_name (component_end - 1,
special->special_coding_system);
/* Allocate a buffer and copy file_name into the same. */
length = SBYTES (file_name) + 1;
name = SAFE_ALLOCA (length + 1);
/* Copy the trailing NULL byte also. */
memcpy (name, SDATA (file_name), length);
vp = (*special->initial) (name, length - 1);
SAFE_FREE ();
return vp;
}
/* Make sure to include the directory separator. */
return (*special->initial) (component_end - 1,
length - special->length);
}
}
/* Otherwise, continue searching for a vnode normally. */
@ -6589,8 +6685,9 @@ android_root_name (struct android_vnode *vnode, char *name,
/* File system lookup. */
/* Look up the vnode that designates NAME, a file name that is at
least N bytes.
/* Look up the vnode that designates NAME, a file name that is at least
N bytes, converting between different file name coding systems as
necessary.
NAME may be either an absolute file name or a name relative to the
current working directory. It must not be longer than EMACS_PATH_MAX
@ -7605,3 +7702,11 @@ android_closedir (struct android_vdir *dirp)
{
return (*dirp->closedir) (dirp);
}
void
syms_of_androidvfs (void)
{
DEFSYM (Qandroid_jni, "android-jni");
}

View file

@ -2444,6 +2444,7 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem
#if !defined ANDROID_STUBIFY
syms_of_androidfont ();
syms_of_androidselect ();
syms_of_androidvfs ();
syms_of_sfntfont ();
syms_of_sfntfont_android ();
#endif /* !ANDROID_STUBIFY */