Introduce a struct charset_table

The fields of the new struct are what the global variables
charset_table, charset_table_size, charset_table_used, and
charset_attributes_table used to be.  The struct should make it clearer
that those fields must be kept in sync.

* src/charset.h (struct charset_table): New struct.
(charset_attributes_getter): Adjust accordingly.
* src/charset.c (charset_table): Change type to struct charset_table.
(charset_table_size, charset_table_used, charset_attributes_table):
Moved to the struct.
(Fdefine_charset_internal, Ffind_charset_region, Ffind_charset_string)
(shrink_charset_table, syms_of_charset): Adjust to struct charset_table.
* src/pdumper.c (dump_charset, dump_charset_table): Adjust to struct
charset_table.
This commit is contained in:
Helmut Eller 2026-02-27 18:32:12 +01:00
parent bbb4fc26e7
commit 32f9e21098
3 changed files with 72 additions and 64 deletions

View file

@ -60,16 +60,8 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
charset symbols, and values are vectors of charset attributes. */
Lisp_Object Vcharset_hash_table;
/* Table of struct charset. */
struct charset *charset_table;
int charset_table_size;
int charset_table_used;
/* Table of attribute vectors. charset_attributes_table[id] contains
the attribute vector for the charset at charset_table[id].
This is a separate vector to simplify GC. */
Lisp_Object charset_attributes_table;
/* The table of all charsets. */
struct charset_table charset_table;
/* Special charsets corresponding to symbols. */
int charset_ascii;
@ -1128,28 +1120,28 @@ usage: (define-charset-internal ...) */)
else
{
hash_put (hash_table, args[charset_arg_name], attrs, hash_code);
if (charset_table_used == charset_table_size)
if (charset_table.used == charset_table.size)
{
/* Ensure that charset IDs fit into 'int' as well as into the
restriction imposed by fixnums. Although the 'int' restriction
could be removed, too much other code would need altering; for
example, the IDs are stuffed into struct
coding_system.charbuf[i] entries, which are 'int'. */
int old_size = charset_table_size;
int old_size = charset_table.size;
ptrdiff_t new_size = old_size;
struct charset *new_table
= xpalloc (0, &new_size, 1,
min (INT_MAX, MOST_POSITIVE_FIXNUM),
sizeof *charset_table);
memcpy (new_table, charset_table,
sizeof *charset_table.start);
memcpy (new_table, charset_table.start,
old_size * sizeof *new_table);
charset_table = new_table;
charset_table_size = new_size;
charset_table.start = new_table;
charset_table.size = new_size;
Lisp_Object new_attr_table = make_vector (new_size, Qnil);
for (size_t i = 0; i < old_size; i++)
ASET (new_attr_table, i,
AREF (charset_attributes_table, i));
charset_attributes_table = new_attr_table;
AREF (charset_table.attributes_table, i));
charset_table.attributes_table = new_attr_table;
/* FIXME: This leaks memory, as the old charset_table becomes
unreachable. If the old charset table is charset_table_init
then this leak is intentional; otherwise, it's unclear.
@ -1158,20 +1150,20 @@ usage: (define-charset-internal ...) */)
charset_table should be freed, by passing it as the 1st argument
to xpalloc and removing the memcpy. */
}
id = charset_table_used++;
id = charset_table.used++;
new_definition_p = 1;
}
ASET (attrs, charset_id, make_fixnum (id));
charset.id = id;
charset_table[id] = charset;
ASET (charset_attributes_table, id, attrs);
eassert (ASIZE (charset_attributes_table) == charset_table_size);
charset_table.start[id] = charset;
ASET (charset_table.attributes_table, id, attrs);
eassert (ASIZE (charset_table.attributes_table) == charset_table.size);
if (charset.method == CHARSET_METHOD_MAP)
{
load_charset (&charset, 0);
charset_table[id] = charset;
charset_table.start[id] = charset;
}
if (charset.iso_final >= 0)
@ -1566,7 +1558,7 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
from_byte = CHAR_TO_BYTE (from);
charsets = make_nil_vector (charset_table_used);
charsets = make_nil_vector (charset_table.used);
while (1)
{
find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
@ -1582,9 +1574,9 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
}
val = Qnil;
for (i = charset_table_used - 1; i >= 0; i--)
for (i = charset_table.used - 1; i >= 0; i--)
if (!NILP (AREF (charsets, i)))
val = Fcons (CHARSET_NAME (charset_table + i), val);
val = Fcons (CHARSET_NAME (charset_table.start + i), val);
return val;
}
@ -1599,14 +1591,14 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
{
CHECK_STRING (str);
Lisp_Object charsets = make_nil_vector (charset_table_used);
Lisp_Object charsets = make_nil_vector (charset_table.used);
find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
charsets, table,
STRING_MULTIBYTE (str));
Lisp_Object val = Qnil;
for (int i = charset_table_used - 1; i >= 0; i--)
for (int i = charset_table.used - 1; i >= 0; i--)
if (!NILP (AREF (charsets, i)))
val = Fcons (CHARSET_NAME (charset_table + i), val);
val = Fcons (CHARSET_NAME (charset_table.start + i), val);
return val;
}
@ -2117,28 +2109,29 @@ DIMENSION, CHARS, and FINAL-CHAR. */)
return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
}
/* Shrink charset_table to charset_table_used. */
/* Shrink charset_table to charset_table.used. */
static void
shrink_charset_table (void)
{
eassert (charset_table_size >= charset_table_used);
eassert (ASIZE (charset_attributes_table) == charset_table_size);
eassert (charset_table.size >= charset_table.used);
eassert (ASIZE (charset_table.attributes_table)
== charset_table.size);
struct charset *old = charset_table;
size_t nbytes = charset_table_used * sizeof *old;
struct charset *old = charset_table.start;
size_t nbytes = charset_table.used * sizeof *old;
struct charset *new = xmalloc (nbytes);
memcpy (new, old, nbytes);
charset_table = new;
charset_table.start = new;
xfree (old);
Lisp_Object new_attr_table = make_vector (charset_table_used, Qnil);
for (size_t i = 0; i < charset_table_used; i++)
ASET (new_attr_table, i, AREF (charset_attributes_table, i));
charset_attributes_table = new_attr_table;
Lisp_Object new_attr_table = make_vector (charset_table.used, Qnil);
for (size_t i = 0; i < charset_table.used; i++)
ASET (new_attr_table, i,
AREF (charset_table.attributes_table, i));
charset_table.attributes_table = new_attr_table;
charset_table_size = charset_table_used;
eassert (ASIZE (charset_attributes_table) == charset_table_size);
charset_table.size = charset_table.used;
eassert (ASIZE (charset_table.attributes_table) == charset_table.size);
}
DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
@ -2400,16 +2393,17 @@ syms_of_charset (void)
staticpro (&Vcharset_hash_table);
Vcharset_hash_table = CALLN (Fmake_hash_table, QCtest, Qeq);
charset_table_size = CHARSET_TABLE_INIT_SIZE;
PDUMPER_REMEMBER_SCALAR (charset_table_size);
charset_table
= xmalloc (charset_table_size * sizeof *charset_table);
charset_table_used = 0;
PDUMPER_REMEMBER_SCALAR (charset_table_used);
charset_table.size = CHARSET_TABLE_INIT_SIZE;
PDUMPER_REMEMBER_SCALAR (charset_table.size);
charset_table.start
= xmalloc (charset_table.size * sizeof *charset_table.start);
charset_table.used = 0;
PDUMPER_REMEMBER_SCALAR (charset_table.used);
charset_attributes_table = make_vector (charset_table_size, Qnil);
staticpro (&charset_attributes_table);
charset_table.attributes_table
= make_vector (charset_table.size, Qnil);
staticpro (&charset_table.attributes_table);
defsubr (&Scharsetp);
defsubr (&Smap_charset_chars);
defsubr (&Sdefine_charset_internal);

View file

@ -243,14 +243,28 @@ struct charset
vectors. */
extern Lisp_Object Vcharset_hash_table;
/* Table of struct charset. */
extern struct charset *charset_table;
extern int charset_table_size;
extern int charset_table_used;
/* A charset_table is an array of struct charset along with a
Lisp_Vector of charset attributes.
extern Lisp_Object charset_attributes_table;
The charset_table.start field either points to xmalloced memory or to
the dump (i.e. pdumper_object_p (charset_table.start) can be true).
#define CHARSET_FROM_ID(id) (charset_table + (id))
charset_table.attributes_table[id] contains the attribute vector for
the charset at charset_table.start[id].
We keep the attributes in a separate vector because that is
convenient for the GC. (We probably need to revise this decision, if
we ever expose struct charset as a Lisp level type.) */
struct charset_table
{
struct charset *start;
unsigned size, used;
Lisp_Object attributes_table;
};
extern struct charset_table charset_table;
#define CHARSET_FROM_ID(id) (charset_table.start + (id))
extern Lisp_Object Vcharset_ordered_list;
extern Lisp_Object Vcharset_non_preferred_head;
@ -290,8 +304,8 @@ extern int emacs_mule_charset[256];
INLINE Lisp_Object
charset_attributes_getter (struct charset *charset)
{
eassert (ASIZE (charset_attributes_table) == charset_table_size);
Lisp_Object attrs = AREF (charset_attributes_table, charset->id);
eassert (ASIZE (charset_table.attributes_table) == charset_table.size);
Lisp_Object attrs = AREF (charset_table.attributes_table, charset->id);
eassert (XFIXNUM (CHARSET_ATTR_ID (attrs)) == charset->id);
return attrs;
}

View file

@ -3214,10 +3214,10 @@ dump_charset (struct dump_context *ctx, int cs_i)
/* We can't change the alignment here, because ctx->offset is what
will be used for the whole array. */
eassert (ctx->offset % alignof (struct charset) == 0);
const struct charset *cs = charset_table + cs_i;
const struct charset *cs = charset_table.start + cs_i;
struct charset out;
dump_object_start (ctx, &out, sizeof (out));
if (cs_i < charset_table_used) /* Don't look at uninitialized data. */
if (cs_i < charset_table.used) /* Don't look at uninitialized data. */
{
DUMP_FIELD_COPY (&out, cs, id);
DUMP_FIELD_COPY (&out, cs, dimension);
@ -3244,7 +3244,7 @@ dump_charset (struct dump_context *ctx, int cs_i)
DUMP_FIELD_COPY (&out, cs, code_offset);
}
dump_off offset = dump_object_finish (ctx, &out, sizeof (out));
if (cs_i < charset_table_used && cs->code_space_mask)
if (cs_i < charset_table.used && cs->code_space_mask)
dump_remember_cold_op (ctx, COLD_OP_CHARSET,
Fcons (dump_off_to_lisp (cs_i),
dump_off_to_lisp (offset)));
@ -3260,8 +3260,8 @@ dump_charset_table (struct dump_context *ctx)
dump_off offset = ctx->offset;
if (dump_set_referrer (ctx))
ctx->current_referrer = build_string ("charset_table");
eassert (charset_table_size == charset_table_used);
for (int i = 0; i < charset_table_size; ++i)
eassert (charset_table.size == charset_table.used);
for (int i = 0; i < charset_table.size; ++i)
dump_charset (ctx, i);
dump_clear_referrer (ctx);
dump_emacs_reloc_to_dump_ptr_raw (ctx, &charset_table, offset);
@ -3411,7 +3411,7 @@ dump_cold_charset (struct dump_context *ctx, Lisp_Object data)
(ctx,
cs_dump_offset + dump_offsetof (struct charset, code_space_mask),
ctx->offset);
struct charset *cs = charset_table + cs_i;
struct charset *cs = charset_table.start + cs_i;
dump_write (ctx, cs->code_space_mask, 256);
}