From 32f9e2109858565166e65da0dabaf48c4baca20c Mon Sep 17 00:00:00 2001 From: Helmut Eller Date: Fri, 27 Feb 2026 18:32:12 +0100 Subject: [PATCH] Introduce a struct charset_table The fields of the new struct are what the global variables charset_table, charset_table_size, charset_table_used, and charset_attributes_table used to be. The struct should make it clearer that those fields must be kept in sync. * src/charset.h (struct charset_table): New struct. (charset_attributes_getter): Adjust accordingly. * src/charset.c (charset_table): Change type to struct charset_table. (charset_table_size, charset_table_used, charset_attributes_table): Moved to the struct. (Fdefine_charset_internal, Ffind_charset_region, Ffind_charset_string) (shrink_charset_table, syms_of_charset): Adjust to struct charset_table. * src/pdumper.c (dump_charset, dump_charset_table): Adjust to struct charset_table. --- src/charset.c | 94 ++++++++++++++++++++++++--------------------------- src/charset.h | 30 +++++++++++----- src/pdumper.c | 12 +++---- 3 files changed, 72 insertions(+), 64 deletions(-) diff --git a/src/charset.c b/src/charset.c index ff501022af9..eae81d689b7 100644 --- a/src/charset.c +++ b/src/charset.c @@ -60,16 +60,8 @@ along with GNU Emacs. If not, see . */ charset symbols, and values are vectors of charset attributes. */ Lisp_Object Vcharset_hash_table; -/* Table of struct charset. */ -struct charset *charset_table; -int charset_table_size; -int charset_table_used; - -/* Table of attribute vectors. charset_attributes_table[id] contains - the attribute vector for the charset at charset_table[id]. - - This is a separate vector to simplify GC. */ -Lisp_Object charset_attributes_table; +/* The table of all charsets. */ +struct charset_table charset_table; /* Special charsets corresponding to symbols. */ int charset_ascii; @@ -1128,28 +1120,28 @@ usage: (define-charset-internal ...) */) else { hash_put (hash_table, args[charset_arg_name], attrs, hash_code); - if (charset_table_used == charset_table_size) + if (charset_table.used == charset_table.size) { /* Ensure that charset IDs fit into 'int' as well as into the restriction imposed by fixnums. Although the 'int' restriction could be removed, too much other code would need altering; for example, the IDs are stuffed into struct coding_system.charbuf[i] entries, which are 'int'. */ - int old_size = charset_table_size; + int old_size = charset_table.size; ptrdiff_t new_size = old_size; struct charset *new_table = xpalloc (0, &new_size, 1, min (INT_MAX, MOST_POSITIVE_FIXNUM), - sizeof *charset_table); - memcpy (new_table, charset_table, + sizeof *charset_table.start); + memcpy (new_table, charset_table.start, old_size * sizeof *new_table); - charset_table = new_table; - charset_table_size = new_size; + charset_table.start = new_table; + charset_table.size = new_size; Lisp_Object new_attr_table = make_vector (new_size, Qnil); for (size_t i = 0; i < old_size; i++) ASET (new_attr_table, i, - AREF (charset_attributes_table, i)); - charset_attributes_table = new_attr_table; + AREF (charset_table.attributes_table, i)); + charset_table.attributes_table = new_attr_table; /* FIXME: This leaks memory, as the old charset_table becomes unreachable. If the old charset table is charset_table_init then this leak is intentional; otherwise, it's unclear. @@ -1158,20 +1150,20 @@ usage: (define-charset-internal ...) */) charset_table should be freed, by passing it as the 1st argument to xpalloc and removing the memcpy. */ } - id = charset_table_used++; + id = charset_table.used++; new_definition_p = 1; } ASET (attrs, charset_id, make_fixnum (id)); charset.id = id; - charset_table[id] = charset; - ASET (charset_attributes_table, id, attrs); - eassert (ASIZE (charset_attributes_table) == charset_table_size); + charset_table.start[id] = charset; + ASET (charset_table.attributes_table, id, attrs); + eassert (ASIZE (charset_table.attributes_table) == charset_table.size); if (charset.method == CHARSET_METHOD_MAP) { load_charset (&charset, 0); - charset_table[id] = charset; + charset_table.start[id] = charset; } if (charset.iso_final >= 0) @@ -1566,7 +1558,7 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) from_byte = CHAR_TO_BYTE (from); - charsets = make_nil_vector (charset_table_used); + charsets = make_nil_vector (charset_table.used); while (1) { find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from, @@ -1582,9 +1574,9 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) } val = Qnil; - for (i = charset_table_used - 1; i >= 0; i--) + for (i = charset_table.used - 1; i >= 0; i--) if (!NILP (AREF (charsets, i))) - val = Fcons (CHARSET_NAME (charset_table + i), val); + val = Fcons (CHARSET_NAME (charset_table.start + i), val); return val; } @@ -1599,14 +1591,14 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) { CHECK_STRING (str); - Lisp_Object charsets = make_nil_vector (charset_table_used); + Lisp_Object charsets = make_nil_vector (charset_table.used); find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str), charsets, table, STRING_MULTIBYTE (str)); Lisp_Object val = Qnil; - for (int i = charset_table_used - 1; i >= 0; i--) + for (int i = charset_table.used - 1; i >= 0; i--) if (!NILP (AREF (charsets, i))) - val = Fcons (CHARSET_NAME (charset_table + i), val); + val = Fcons (CHARSET_NAME (charset_table.start + i), val); return val; } @@ -2117,28 +2109,29 @@ DIMENSION, CHARS, and FINAL-CHAR. */) return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil); } -/* Shrink charset_table to charset_table_used. */ +/* Shrink charset_table to charset_table.used. */ static void shrink_charset_table (void) { - eassert (charset_table_size >= charset_table_used); - eassert (ASIZE (charset_attributes_table) == charset_table_size); + eassert (charset_table.size >= charset_table.used); + eassert (ASIZE (charset_table.attributes_table) + == charset_table.size); - struct charset *old = charset_table; - size_t nbytes = charset_table_used * sizeof *old; + struct charset *old = charset_table.start; + size_t nbytes = charset_table.used * sizeof *old; struct charset *new = xmalloc (nbytes); memcpy (new, old, nbytes); - charset_table = new; + charset_table.start = new; xfree (old); - Lisp_Object new_attr_table = make_vector (charset_table_used, Qnil); - for (size_t i = 0; i < charset_table_used; i++) - ASET (new_attr_table, i, AREF (charset_attributes_table, i)); - charset_attributes_table = new_attr_table; + Lisp_Object new_attr_table = make_vector (charset_table.used, Qnil); + for (size_t i = 0; i < charset_table.used; i++) + ASET (new_attr_table, i, + AREF (charset_table.attributes_table, i)); + charset_table.attributes_table = new_attr_table; - charset_table_size = charset_table_used; - - eassert (ASIZE (charset_attributes_table) == charset_table_size); + charset_table.size = charset_table.used; + eassert (ASIZE (charset_table.attributes_table) == charset_table.size); } DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps, @@ -2400,16 +2393,17 @@ syms_of_charset (void) staticpro (&Vcharset_hash_table); Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq); - charset_table_size = CHARSET_TABLE_INIT_SIZE; - PDUMPER_REMEMBER_SCALAR (charset_table_size); - charset_table - = xmalloc (charset_table_size * sizeof *charset_table); - charset_table_used = 0; - PDUMPER_REMEMBER_SCALAR (charset_table_used); + charset_table.size = CHARSET_TABLE_INIT_SIZE; + PDUMPER_REMEMBER_SCALAR (charset_table.size); + charset_table.start + = xmalloc (charset_table.size * sizeof *charset_table.start); + charset_table.used = 0; + PDUMPER_REMEMBER_SCALAR (charset_table.used); - charset_attributes_table = make_vector (charset_table_size, Qnil); - staticpro (&charset_attributes_table); + charset_table.attributes_table + = make_vector (charset_table.size, Qnil); + staticpro (&charset_table.attributes_table); defsubr (&Scharsetp); defsubr (&Smap_charset_chars); defsubr (&Sdefine_charset_internal); diff --git a/src/charset.h b/src/charset.h index 7cd2f8b70cd..f7cd08e3a88 100644 --- a/src/charset.h +++ b/src/charset.h @@ -243,14 +243,28 @@ struct charset vectors. */ extern Lisp_Object Vcharset_hash_table; -/* Table of struct charset. */ -extern struct charset *charset_table; -extern int charset_table_size; -extern int charset_table_used; +/* A charset_table is an array of struct charset along with a + Lisp_Vector of charset attributes. -extern Lisp_Object charset_attributes_table; + The charset_table.start field either points to xmalloced memory or to + the dump (i.e. pdumper_object_p (charset_table.start) can be true). -#define CHARSET_FROM_ID(id) (charset_table + (id)) + charset_table.attributes_table[id] contains the attribute vector for + the charset at charset_table.start[id]. + + We keep the attributes in a separate vector because that is + convenient for the GC. (We probably need to revise this decision, if + we ever expose struct charset as a Lisp level type.) */ +struct charset_table +{ + struct charset *start; + unsigned size, used; + Lisp_Object attributes_table; +}; + +extern struct charset_table charset_table; + +#define CHARSET_FROM_ID(id) (charset_table.start + (id)) extern Lisp_Object Vcharset_ordered_list; extern Lisp_Object Vcharset_non_preferred_head; @@ -290,8 +304,8 @@ extern int emacs_mule_charset[256]; INLINE Lisp_Object charset_attributes_getter (struct charset *charset) { - eassert (ASIZE (charset_attributes_table) == charset_table_size); - Lisp_Object attrs = AREF (charset_attributes_table, charset->id); + eassert (ASIZE (charset_table.attributes_table) == charset_table.size); + Lisp_Object attrs = AREF (charset_table.attributes_table, charset->id); eassert (XFIXNUM (CHARSET_ATTR_ID (attrs)) == charset->id); return attrs; } diff --git a/src/pdumper.c b/src/pdumper.c index 6461ec170d0..b0f40c6e3ce 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -3214,10 +3214,10 @@ dump_charset (struct dump_context *ctx, int cs_i) /* We can't change the alignment here, because ctx->offset is what will be used for the whole array. */ eassert (ctx->offset % alignof (struct charset) == 0); - const struct charset *cs = charset_table + cs_i; + const struct charset *cs = charset_table.start + cs_i; struct charset out; dump_object_start (ctx, &out, sizeof (out)); - if (cs_i < charset_table_used) /* Don't look at uninitialized data. */ + if (cs_i < charset_table.used) /* Don't look at uninitialized data. */ { DUMP_FIELD_COPY (&out, cs, id); DUMP_FIELD_COPY (&out, cs, dimension); @@ -3244,7 +3244,7 @@ dump_charset (struct dump_context *ctx, int cs_i) DUMP_FIELD_COPY (&out, cs, code_offset); } dump_off offset = dump_object_finish (ctx, &out, sizeof (out)); - if (cs_i < charset_table_used && cs->code_space_mask) + if (cs_i < charset_table.used && cs->code_space_mask) dump_remember_cold_op (ctx, COLD_OP_CHARSET, Fcons (dump_off_to_lisp (cs_i), dump_off_to_lisp (offset))); @@ -3260,8 +3260,8 @@ dump_charset_table (struct dump_context *ctx) dump_off offset = ctx->offset; if (dump_set_referrer (ctx)) ctx->current_referrer = build_string ("charset_table"); - eassert (charset_table_size == charset_table_used); - for (int i = 0; i < charset_table_size; ++i) + eassert (charset_table.size == charset_table.used); + for (int i = 0; i < charset_table.size; ++i) dump_charset (ctx, i); dump_clear_referrer (ctx); dump_emacs_reloc_to_dump_ptr_raw (ctx, &charset_table, offset); @@ -3411,7 +3411,7 @@ dump_cold_charset (struct dump_context *ctx, Lisp_Object data) (ctx, cs_dump_offset + dump_offsetof (struct charset, code_space_mask), ctx->offset); - struct charset *cs = charset_table + cs_i; + struct charset *cs = charset_table.start + cs_i; dump_write (ctx, cs->code_space_mask, 256); }