* src/marker.c: Try and speed up byte<->char conversion with many markers.

When considering markers (to find a starting point for the conversion), typically one of the two bounds is nearby (coming from cached_(byte|char)pos) but the other is far (point-min or point-max), so change the exit condition so we stop as soon as *one* of the bounds is near. (BYTECHAR_DISTANCE_INITIAL, BYTECHAR_DISTANCE_INCREMENT): New constants. (buf_charpos_to_bytepos, buf_bytepos_to_charpos): Use them to try and reduce the number of markers we consider.
2026-02-16 17:24:23 +00:00 · 2018-03-26 09:01:30 -04:00 · 2018-03-26 09:01:30 -04:00 · b300052fb4
commit b300052fb4
parent 9c1176247b
1 changed files with 40 additions and 10 deletions
--- a/src/marker.c
+++ b/src/marker.c
@ -90,7 +90,7 @@ clear_charpos_cache (struct buffer *b)
 #define CONSIDER(CHARPOS, BYTEPOS)					\
 {									\
  ptrdiff_t this_charpos = (CHARPOS);					\
-  bool changed = 0;							\
+  bool changed = false;							\
 									\
  if (this_charpos == charpos)						\
    {									\
@ -105,14 +105,14 @@ clear_charpos_cache (struct buffer *b)
 	{								\
 	  best_above = this_charpos;					\
 	  best_above_byte = (BYTEPOS);					\
-	  changed = 1;							\
+	  changed = true;						\
 	}								\
    }									\
  else if (this_charpos > best_below)					\
    {									\
      best_below = this_charpos;					\
      best_below_byte = (BYTEPOS);					\
-      changed = 1;							\
+      changed = true;							\
    }									\
 									\
  if (changed)								\
@ -133,6 +133,28 @@ CHECK_MARKER (Lisp_Object x)
  CHECK_TYPE (MARKERP (x), Qmarkerp, x);
 }
 /* When converting bytes from/to chars, we look through the list of
   markers to try and find a good starting point (since markers keep
   track of both bytepos and charpos at the same time).
   But if there are many markers, it can take too much time to find a "good"
   marker from which to start.  Worse yet: if it takes a long time and we end
   up finding a nearby markers, we won't add a new marker to cache this
   result, so next time around we'll have to go through this same long list
   to (re)find this best marker.  So the further down the list of
   markers we go, the less demanding we are w.r.t what is a good marker.
   The previous code used INITIAL=50 and INCREMENT=0 and this lead to
   really poor performance when there are many markers.
   I haven't tried to tweak INITIAL, but experiments on my trusty Thinkpad
   T61 using various artificial test cases seem to suggest that INCREMENT=50
   might be "the best compromise": it significantly improved the
   worst case and it was rarely slower and never by much.
   The asymptotic behavior is still poor, tho, so in largish buffers with many
   overlays (e.g. 300KB and 30K overlays), it can still be a bottlneck.  */
 #define BYTECHAR_DISTANCE_INITIAL 50
 #define BYTECHAR_DISTANCE_INCREMENT 50
 /* Return the byte position corresponding to CHARPOS in B.  */
 ptrdiff_t
@ -141,6 +163,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
  struct Lisp_Marker *tail;
  ptrdiff_t best_above, best_above_byte;
  ptrdiff_t best_below, best_below_byte;
  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
  eassert (BUF_BEG (b) <= charpos && charpos <= BUF_Z (b));
@ -180,8 +203,11 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
      /* If we are down to a range of 50 chars,
 	 don't bother checking any other markers;
 	 scan the intervening chars directly now.  */
-      if (best_above - best_below < 50)
+      if (best_above - charpos < distance
          || charpos - best_below < distance)
 	break;
      else
        distance += BYTECHAR_DISTANCE_INCREMENT;
    }
  /* We get here if we did not exactly hit one of the known places.
@ -248,7 +274,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
 #define CONSIDER(BYTEPOS, CHARPOS)					\
 {									\
  ptrdiff_t this_bytepos = (BYTEPOS);					\
-  int changed = 0;							\
+  int changed = false;							\
 									\
  if (this_bytepos == bytepos)						\
    {									\
@ -263,14 +289,14 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
 	{								\
 	  best_above = (CHARPOS);					\
 	  best_above_byte = this_bytepos;				\
-	  changed = 1;							\
+	  changed = true;						\
 	}								\
    }									\
  else if (this_bytepos > best_below_byte)				\
    {									\
      best_below = (CHARPOS);						\
      best_below_byte = this_bytepos;					\
-      changed = 1;							\
+      changed = true;							\
    }									\
 									\
  if (changed)								\
@ -293,6 +319,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
  struct Lisp_Marker *tail;
  ptrdiff_t best_above, best_above_byte;
  ptrdiff_t best_below, best_below_byte;
  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
  eassert (BUF_BEG_BYTE (b) <= bytepos && bytepos <= BUF_Z_BYTE (b));
@ -323,8 +350,11 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
      /* If we are down to a range of 50 chars,
 	 don't bother checking any other markers;
 	 scan the intervening chars directly now.  */
-      if (best_above - best_below < 50)
+      if (best_above - bytepos < distance
          || bytepos - best_below < distance)
 	break;
      else
        distance += BYTECHAR_DISTANCE_INCREMENT;
    }
  /* We get here if we did not exactly hit one of the known places.
@ -744,8 +774,8 @@ count_markers (struct buffer *buf)
 ptrdiff_t
 verify_bytepos (ptrdiff_t charpos)
 {
-  ptrdiff_t below = 1;
+  ptrdiff_t below = BEG;
-  ptrdiff_t below_byte = 1;
+  ptrdiff_t below_byte = BYTE_BEG;
  while (below != charpos)
    {