bug-texinfo
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RFC: Multibyte support in the info viewer


From: Miloslav Trmac
Subject: RFC: Multibyte support in the info viewer
Date: Tue, 30 Jan 2007 05:23:41 +0100
User-agent: Thunderbird 1.5.0.9 (X11/20061219)

Hello,
the attached patch adds multibyte display, cursor movement and editing
support to the info viewer.  Multibyte character entry and completions
are not really supported.

The patch uses the mbchar, mbiter and mbuiter modules from gnulib;
unfortunately these modules require mbrtowc () and have no fallback.  I
guess this requirement is not acceptable for texinfo, is it?

I'd be grateful for any comments about the patch.  If the approach is
reasonable, I'll add the necessary fallbacks to gnulib and resend the
patch once it is tested with the new gnulib code.

Thanks,
        Mirek
Index: configure.ac
===================================================================
RCS file: /sources/texinfo/texinfo/configure.ac,v
retrieving revision 1.67
diff -u -r1.67 configure.ac
--- configure.ac        27 Dec 2006 16:20:17 -0000      1.67
+++ configure.ac        30 Jan 2007 04:09:17 -0000
@@ -81,7 +81,7 @@
 # in theory only pre-sysvr3 systems needed this and it's not likely
 # that anyone compiling new texinfo still has such a thing? we'll see.
 # AC_FUNC_SETVBUF_REVERSED
-AC_CHECK_FUNCS(bzero getcwd memset setvbuf sigaction sigprocmask \
+AC_CHECK_FUNCS(bzero getcwd memset setlocale setvbuf sigaction sigprocmask \
                sigsetmask strchr wcwidth)
 AC_REPLACE_FUNCS(memcpy memmove strdup strerror)
 
Index: info/display.c
===================================================================
RCS file: /sources/texinfo/texinfo/info/display.c,v
retrieving revision 1.9
diff -u -r1.9 display.c
--- info/display.c      9 Jan 2006 01:49:25 -0000       1.9
+++ info/display.c      30 Jan 2007 04:09:17 -0000
@@ -19,7 +19,10 @@
 
    Originally written by Brian Fox (address@hidden). */
 
+
 #include "info.h"
+#include "mbiter.h"
+#include "mbswidth.h"
 #include "display.h"
 
 extern int info_any_buffered_input_p (void); /* Found in session.c. */
@@ -122,14 +125,14 @@
 void
 display_update_one_window (WINDOW *win)
 {
-  register char *nodetext;      /* Current character to display. */
+  mbi_iterator_t nodetext;     /* Current character to display. */
   register char *last_node_char; /* Position of the last character in node. */
   register int i;               /* General use index. */
   char *printed_line;           /* Buffer for a printed line. */
   int pl_index = 0;             /* Index into PRINTED_LINE. */
   int line_index = 0;           /* Number of lines done so far. */
-  int pl_ignore = 0;           /* How many chars use zero width on screen. */
-  int allocated_win_width;
+  int pl_ignore = 0; /* How many non-control chars use zero width on screen. */
+  int allocated_win_width, hpos;
   DISPLAY_LINE **display = the_display;
 
   /* If display is inhibited, that counts as an interrupted display. */
@@ -156,54 +159,53 @@
   if (!win->node || !win->line_starts)
     goto done_with_node_display;
 
-  nodetext = win->line_starts[win->pagetop];
   last_node_char = win->node->contents + win->node->nodelen;
+  mbi_init (nodetext, win->line_starts[win->pagetop],
+           last_node_char - win->line_starts[win->pagetop]);
+  hpos = 0;
 
-  for (; nodetext < last_node_char; nodetext++)
+  for (; mbi_avail (nodetext); mbi_advance (nodetext))
     {
-      char *rep = NULL, *rep_carried_over, rep_temp[2];
-      int replen;
+      const char *rep = NULL;
+      int rep_carried_over;
+      int replen; /* 0 to force line termination */
+      int cwidth;
 
-      if (isprint (*nodetext))
+      if (mb_isprint (mbi_cur (nodetext)))
         {
-          rep_temp[0] = *nodetext;
-          replen = 1;
-          rep_temp[1] = '\0';
-          rep = rep_temp;
+         rep = mbi_cur_ptr (nodetext);
+         replen = mb_len (mbi_cur (nodetext));
         }
       else
         {
-          if (*nodetext == '\r' || *nodetext == '\n')
-            {
-              replen = win->width - pl_index + pl_ignore;
-            }
-         else if (*nodetext == '\0'
-                  && (nodetext + 2) < last_node_char
-                  && *(nodetext + 1) == '\b'
-                  && *(nodetext + 2) == '[')
+          if (mb_iseq (mbi_cur (nodetext), '\r')
+             || mb_iseq (mbi_cur (nodetext), '\n'))
+           replen = 0;
+         else if (mb_iseq (mbi_cur (nodetext), '\0')
+                  && (mbi_cur_ptr (nodetext) + 2) < last_node_char
+                  && *(mbi_cur_ptr (nodetext) + 1) == '\b'
+                  && *(mbi_cur_ptr (nodetext) + 2) == '[')
            {
              /* Found new style tag/cookie \0\b[
                 Read until the closing tag \0\b] */
              int element_len = 0;
              char *element;
+             const char *p;
 
              /* Skip the escapes.  */
-             nodetext += 3;
+             p = mbi_cur_ptr (nodetext) + 3;
 
-             while (!(*nodetext == '\0'
-                   && *(nodetext + 1) == '\b'
-                   && *(nodetext + 2) == ']'))
+             while (!(*p == '\0' && *(p + 1) == '\b' && *(p + 2) == ']'))
                {
-                 nodetext++;
+                 p++;
                  element_len++;
                }
 
              element = (char *) malloc (element_len + 1);
-             strncpy (element, nodetext - element_len, element_len);
+             strncpy (element, p - element_len, element_len);
 
              /* Skip the escapes.  */
-             nodetext += 2;
-             pl_ignore += element_len + 5;
+             p += 2;
              /* Append string terminator.  */
              element[element_len] = '\0';
 
@@ -212,27 +214,30 @@
              /* Over and out */
              free (element);
 
+             mbi_init (nodetext, p, last_node_char - p);
              continue;
            }
           else
             {
-              rep = printed_representation (*nodetext, pl_index);
+              rep = printed_representation (&mbi_cur (nodetext),
+                                           hpos - pl_ignore);
               replen = strlen (rep);
             }
         }
 
       /* Support ANSI escape sequences under -R.  */
       if (raw_escapes_p
-         && *nodetext == '\033'
-         && nodetext[1] == '['
-         && isdigit (nodetext[2]))
+         && mb_iseq (mbi_cur (nodetext), '\033')
+         && mbi_cur_ptr (nodetext)[1] == '['
+         && isdigit (mbi_cur_ptr (nodetext)[2]))
        {
-         if (nodetext[3] == 'm')
+         if (mbi_cur_ptr (nodetext)[3] == 'm')
+           pl_ignore += 3;
+         else if (isdigit (mbi_cur_ptr (nodetext)[3])
+                  && mbi_cur_ptr (nodetext)[4] == 'm')
            pl_ignore += 4;
-         else if (isdigit (nodetext[3]) && nodetext[4] == 'm')
-           pl_ignore += 5;
        }
-      while (pl_index + 2 >= allocated_win_width - 1)
+      while (pl_index + replen + 1 >= allocated_win_width - 1)
        {
          allocated_win_width *= 2;
          printed_line = (char *)xrealloc (printed_line, allocated_win_width);
@@ -240,7 +245,8 @@
 
       /* If this character can be printed without passing the width of
          the line, then stuff it into the line. */
-      if (replen + pl_index < win->width + pl_ignore)
+      cwidth = mbsnwidth (rep, replen, 0);
+      if (replen != 0 && hpos + cwidth < win->width + pl_ignore)
         {
           /* Optimize if possible. */
           if (replen == 1)
@@ -252,6 +258,7 @@
               for (i = 0; i < replen; i++)
                 printed_line[pl_index++] = rep[i];
             }
+         hpos += cwidth;
         }
       else
         {
@@ -260,21 +267,34 @@
           /* If this character cannot be printed in this line, we have
              found the end of this line as it would appear on the screen.
              Carefully print the end of the line, and then compare. */
-          if (*nodetext == '\n' || *nodetext == '\r' || *nodetext == '\t')
+          if (mb_iseq (mbi_cur (nodetext), '\n')
+             || mb_iseq (mbi_cur (nodetext), '\r')
+             || mb_iseq (mbi_cur (nodetext), '\t'))
             {
               printed_line[pl_index] = '\0';
-              rep_carried_over = (char *)NULL;
+              rep_carried_over = 0;
             }
           else
             {
+             mbi_iterator_t rep_it;
+
               /* The printed representation of this character extends into
-                 the next line.  Remember the offset of the last character
-                 printed out of REP so that we can carry the character over
-                 to the next line. */
-              for (i = 0; pl_index < (win->width + pl_ignore - 1);)
-                printed_line[pl_index++] = rep[i++];
+                 the next line. */
+             for (mbi_init (rep_it, rep, replen); mbi_avail (rep_it);
+                  mbi_advance (rep_it))
+               {
+                 int width;
 
-              rep_carried_over = rep + i;
+                 width = mb_width (mbi_cur (rep_it));
+                 if (hpos + width > win->width + pl_ignore - 1)
+                   break;
+                 memcpy (printed_line + pl_index, mbi_cur_ptr (rep_it),
+                         mb_len (mbi_cur (rep_it)));
+                 pl_index += mb_len (mbi_cur (rep_it));
+                 hpos += width;
+               }
+
+              rep_carried_over = 1;
 
               /* If printing the last character in this window couldn't
                  possibly cause the screen to scroll, place a backslash
@@ -285,6 +305,7 @@
                     printed_line[pl_index++] = '$';
                   else
                     printed_line[pl_index++] = '\\';
+                 hpos++;
                 }
               printed_line[pl_index] = '\0';
             }
@@ -320,8 +341,12 @@
                  at all, we must do some redrawing. */
               if ((i != pl_index) || (pl_index != entry->textlen))
                 {
-                  /* Move to the proper point on the terminal. */
-                  terminal_goto_xy (i, line_index + win->first_row);
+                  /* Move to the proper point on the terminal.  If entry->text
+                    contained an escape sequences, it was cleared above, so
+                    there is no escape sequence in printed_line[0..i) and it
+                    is not necessary to handle pl_ignore. */
+                  terminal_goto_xy (mbsnwidth (printed_line, i, 0),
+                                   line_index + win->first_row);
 
                   /* If there is any text to print, print it. */
                   if (i != pl_index)
@@ -330,8 +355,8 @@
                   /* If the printed text didn't extend all the way to the edge
                      of the window, and text was appearing between here and the
                      edge of the window, clear from here to the end of the 
line. */
-                  if ((pl_index < win->width + pl_ignore
-                      && pl_index < entry->textlen)
+                  if ((hpos < win->width + pl_ignore
+                      && hpos < mbsnwidth (entry->text, entry->textlen, 0))
                      || (entry->inverse))
                     terminal_clear_to_eol ();
 
@@ -340,11 +365,13 @@
                   /* Update the display text buffer. */
                  if (strlen (printed_line) > (unsigned int) screenwidth)
                /*     printed_line[] can include more than screenwidth
-                      characters if we are under -R and there are escape
-                      sequences in it.  However, entry->text was
-                      allocated (in display_initialize_display) for
-                      screenwidth characters only.  */
-               entry->text     = xrealloc (entry->text, strlen 
(printed_line)+1);
+                      characters if it contains multibyte characters or we are
+                      under -R and there are escape sequences in it.  However,
+                      entry->text was allocated (in
+                      display_initialize_display) for screenwidth characters
+                      only.  */
+                   entry->text = xrealloc (entry->text,
+                                           strlen (printed_line)+1);
                   strcpy (entry->text + i, printed_line + i);
                   entry->textlen = pl_index;
 
@@ -371,27 +398,29 @@
 
           /* Reset PL_INDEX to the start of the line. */
           pl_index = 0;
+         hpos = 0;
          pl_ignore = 0;        /* this is computed per line */
 
           /* If there are characters from REP left to print, stuff them
              into the buffer now. */
           if (rep_carried_over)
-            for (; rep[pl_index]; pl_index++)
-              printed_line[pl_index] = rep[pl_index];
+           {
+             for (; pl_index < replen; pl_index++)
+               printed_line[pl_index] = rep[pl_index];
+             hpos = mbsnwidth (printed_line, pl_index, 0);
+           }
 
           /* If this window has chosen not to wrap lines, skip to the end
              of the physical line in the buffer, and start a new line here. */
           if (pl_index && (win->flags & W_NoWrap))
             {
-              char *begin;
-
               pl_index = 0;
+             hpos = 0;
               printed_line[0] = '\0';
 
-              begin = nodetext;
-
-              while ((nodetext < last_node_char) && (*nodetext != '\n'))
-                nodetext++;
+              while (mbi_avail (nodetext)
+                    && !mb_iseq (mbi_cur (nodetext), '\n'))
+                mbi_advance (nodetext);
             }
         }
     }
Index: info/echo-area.c
===================================================================
RCS file: /sources/texinfo/texinfo/info/echo-area.c,v
retrieving revision 1.8
diff -u -r1.8 echo-area.c
--- info/echo-area.c    15 May 2005 00:00:06 -0000      1.8
+++ info/echo-area.c    30 Jan 2007 04:09:17 -0000
@@ -21,6 +21,10 @@
    Written by Brian Fox (address@hidden). */
 
 #include "info.h"
+#include "mbiter.h"
+#include "mbswidth.h"
+#include "mbuiter.h"
+#include "xalloc.h"
 
 #if defined (FD_SET)
 #  if defined (hpux)
@@ -287,21 +291,42 @@
     ea_backward (window, -count, key);
   else
     {
-      input_line_point += count;
-      if (input_line_point > input_line_end)
-        input_line_point = input_line_end;
+      mbi_iterator_t it;
+
+      for (mbi_init (it, input_line + input_line_point,
+                    input_line_end - input_line_point);
+          mbi_avail (it) && count != 0; mbi_advance (it), count--)
+       ;
+      input_line_point = mbi_cur_ptr (it) - input_line;
     }
 }
 
 DECLARE_INFO_COMMAND (ea_backward, _("Move backward a character"))
 {
-  if (count < 0)
+  if (count <= 0)
     ea_forward (window, -count, key);
   else
     {
-      input_line_point -= count;
-      if (input_line_point < input_line_beg)
-        input_line_point = input_line_beg;
+      const char **last_n;
+      mbi_iterator_t it;
+      size_t i;
+
+      last_n = XCALLOC (count, const char *);
+      i = 0;
+      /* last_n[i, (i + 1) % count, ..., count - 1] point to the previous
+        COUNT characters, or NULL if there is no such character. */
+      for (mbi_init (it, input_line + input_line_beg,
+                    input_line_point - input_line_beg); mbi_avail (it);
+          mbi_advance (it))
+       {
+         last_n[i] = mbi_cur_ptr (it);
+         i = (i + 1) % count;
+       }
+      if (last_n[i] != NULL)
+       input_line_point = last_n[i] - input_line;
+      else
+       input_line_point = input_line_beg;
+      free (last_n);
     }
 }
 
@@ -315,93 +340,101 @@
   input_line_point = input_line_end;
 }
 
-#define alphabetic(c) (islower (c) || isupper (c) || isdigit (c))
+#define alphabetic(c) (mb_isalpha (c) || mb_isdigit (c))
 
 /* Move forward a word in the input line. */
 DECLARE_INFO_COMMAND (ea_forward_word, _("Move forward a word"))
 {
-  int c;
-
   if (count < 0)
     ea_backward_word (window, -count, key);
   else
     {
+      mbi_iterator_t it;
+
+      mbi_init (it, input_line + input_line_point,
+               input_line_end - input_line_point);
+
       while (count--)
         {
-          if (input_line_point == input_line_end)
-            return;
+          if (!mbi_avail (it))
+           break;
 
           /* If we are not in a word, move forward until we are in one.
              Then, move forward until we hit a non-alphabetic character. */
-          c = input_line[input_line_point];
-
-          if (!alphabetic (c))
+          if (!alphabetic (mbi_cur (it)))
             {
-              while (++input_line_point < input_line_end)
-                {
-                  c = input_line[input_line_point];
-                  if (alphabetic (c))
-                    break;
-                }
+             do
+               mbi_advance (it);
+              while (mbi_avail (it) && !alphabetic (mbi_cur (it)));
             }
 
-          if (input_line_point == input_line_end)
-            return;
+          if (!mbi_avail (it))
+           break;
 
-          while (++input_line_point < input_line_end)
-            {
-              c = input_line[input_line_point];
-              if (!alphabetic (c))
-                break;
-            }
+         do
+           mbi_advance (it);
+          while (mbi_avail (it) && alphabetic (mbi_cur (it)));
         }
+      input_line_point = mbi_cur_ptr (it) - input_line;
     }
 }
 
 DECLARE_INFO_COMMAND (ea_backward_word, _("Move backward a word"))
 {
-  int c;
-
-  if (count < 0)
+  if (count <= 0)
     ea_forward_word (window, -count, key);
   else
     {
+      mbi_iterator_t it;
+      const char **chars;
+      size_t i;
+
+      chars = XNMALLOC (input_line_point - input_line_beg + 1, const char *);
+      i = 0;
+      for (mbi_init (it, input_line + input_line_beg,
+                    input_line_point - input_line_beg); mbi_avail (it);
+          mbi_advance (it))
+       chars[i++] = mbi_cur_ptr (it);
+      chars[i] = mbi_cur_ptr (it);
       while (count--)
         {
-          if (input_line_point == input_line_beg)
-            return;
+          if (i == 0)
+            break;
 
           /* Like ea_forward_word (), except that we look at the
              characters just before point. */
 
-          c = input_line[input_line_point - 1];
+         mbi_init (it, chars[i - 1], chars[i] - chars[i - 1]);
+         mbi_avail (it);
 
-          if (!alphabetic (c))
+          if (!alphabetic (mbi_cur (it)))
             {
-              while ((--input_line_point) != input_line_beg)
+              while (--i != 0)
                 {
-                  c = input_line[input_line_point - 1];
-                  if (alphabetic (c))
+                 mbi_init (it, chars[i - 1], chars[i] - chars[i - 1]);
+                 mbi_avail (it);
+                  if (alphabetic (mbi_cur (it)))
                     break;
                 }
             }
 
-          while (input_line_point != input_line_beg)
+          while (i != 0)
             {
-              c = input_line[input_line_point - 1];
-              if (!alphabetic (c))
+             mbi_init (it, chars[i - 1], chars[i] - chars[i - 1]);
+             mbi_avail (it);
+              if (!alphabetic (mbi_cur (it)))
                 break;
               else
-                --input_line_point;
+                --i;
             }
         }
+      input_line_point = chars[i] - input_line;
+      free (chars);
     }
 }
 
 DECLARE_INFO_COMMAND (ea_delete, _("Delete the character under the cursor"))
 {
-  register int i;
-
   if (count < 0)
     ea_rubout (window, -count, key);
   else
@@ -420,10 +453,18 @@
         }
       else
         {
-          for (i = input_line_point; i < input_line_end; i++)
-            input_line[i] = input_line[i + 1];
+         mbi_iterator_t it;
+         size_t next_char;
 
-          input_line_end--;
+         mbi_init (it, input_line + input_line_point,
+                   input_line_end - input_line_point);
+         mbi_avail (it);
+         mbi_advance (it);
+         next_char = mbi_cur_ptr (it) - input_line;
+         memmove (input_line + input_line_point, input_line + next_char,
+                  (input_line_end + 1) - next_char);
+
+          input_line_end -= next_char - input_line_point;
         }
     }
 }
@@ -504,32 +545,63 @@
    then transpose the characters before point. */
 DECLARE_INFO_COMMAND (ea_transpose_chars, _("Transpose characters at point"))
 {
+  mbi_iterator_t it;
+  char **chars;
+  size_t num_chars, point;
+
   /* Handle conditions that would make it impossible to transpose
      characters. */
-  if (!count || !input_line_point || (input_line_end - input_line_beg) < 2)
+  if (!count || input_line_point == input_line_beg)
     return;
 
-  while (count)
+  chars = XNMALLOC (input_line_end - input_line_beg + 1, char *);
+  point = 0;
+  num_chars = 0;
+  for (mbi_init (it, input_line + input_line_beg,
+                input_line_end - input_line_beg); mbi_avail (it);
+       mbi_advance (it))
+    {
+      if (mbi_cur_ptr (it) == input_line + input_line_point)
+       point = num_chars;
+      chars[num_chars++] = (char *)mbi_cur_ptr (it);
+    }
+  if (mbi_cur_ptr (it) == input_line + input_line_point)
+    point = num_chars;
+  chars[num_chars] = (char *)mbi_cur_ptr (it);
+  if (num_chars < 2)
     {
-      int t;
-      if (input_line_point == input_line_end)
-        {
-          t = input_line[input_line_point - 1];
+      free (chars);
+      return;
+    }
 
-          input_line[input_line_point - 1] = input_line[input_line_point - 2];
-          input_line[input_line_point - 2] = t;
-        }
+  while (count)
+    {
+      char c1[MB_LEN_MAX], c2[MB_LEN_MAX];
+      size_t len1, len2;
+      size_t pos;
+
+      if (point == 0)
+       pos = point;
+      else if (point != num_chars)
+       pos = point - 1;
       else
-        {
-          t = input_line[input_line_point];
+       pos = point - 2;
 
-          input_line[input_line_point] = input_line[input_line_point - 1];
-          input_line[input_line_point - 1] = t;
+      len1 = chars[pos + 1] - chars[pos];
+      memcpy (c1, chars[pos], len1);
+      len2 = chars[pos + 2] - chars[pos + 1];
+      memcpy (c2, chars[pos + 1], len2);
+
+      memcpy (chars[pos], c2, len2);
+      chars[pos + 1] = chars[pos] + len2;
+      memcpy (chars[pos + 1], c1, len1);
 
-          if (count < 0 && input_line_point != input_line_beg)
-            input_line_point--;
+      if (point != num_chars)
+        {
+          if (count < 0 && point != 0)
+            point--;
           else
-            input_line_point++;
+            point++;
         }
 
       if (count < 0)
@@ -537,6 +609,8 @@
       else
         count--;
     }
+  input_line_point = chars[point] - input_line;
+  free (chars);
 }
 
 /* **************************************************************** */
@@ -942,7 +1016,7 @@
       /* Find the maximum length of a label. */
       for (i = 0; i < completions_found_index; i++)
         {
-          int len = strlen (completions_found[i]->label);
+          int len = mbswidth (completions_found[i]->label, 0);
           if (len > max_label)
             max_label = len;
         }
@@ -982,7 +1056,7 @@
                   int printed_length, k;
 
                   label = completions_found[l]->label;
-                  printed_length = strlen (label);
+                  printed_length = mbswidth (label, 0);
                   printf_to_message_buffer ("%s", label, NULL, NULL);
 
                   if (j + 1 < limit)
@@ -1167,7 +1241,9 @@
 }
 
 /* Build a list of possible completions from echo_area_completion_items,
-   and the contents of input_line. */
+   and the contents of input_line.
+
+   FIXME: this function is not multibyte-aware. */
 static void
 build_completions (void)
 {
Index: info/info-utils.c
===================================================================
RCS file: /sources/texinfo/texinfo/info/info-utils.c,v
retrieving revision 1.5
diff -u -r1.5 info-utils.c
--- info/info-utils.c   15 May 2005 00:00:07 -0000      1.5
+++ info/info-utils.c   30 Jan 2007 04:09:17 -0000
@@ -20,6 +20,7 @@
    Originally written by Brian Fox (address@hidden). */
 
 #include "info.h"
+#include "mbchar.h"
 #include "info-utils.h"
 #if defined (HANDLE_MAN_PAGES)
 #  include "man.h"
@@ -477,26 +478,26 @@
 }
 
 /* String representation of a char returned by printed_representation (). */
-static char the_rep[10];
+static char the_rep[MB_LEN_MAX + 1 > 10 ? MB_LEN_MAX + 1 : 10];
 
 /* Return a pointer to a string which is the printed representation
-   of CHARACTER if it were printed at HPOS. */
+   of *CHARACTER if it were printed at HPOS. */
 char *
-printed_representation (unsigned char character, int hpos)
+printed_representation (const mbchar_t *character, int hpos)
 {
   register int i = 0;
-  int printable_limit = ISO_Latin_p ? 255 : 127;
 
-  if (raw_escapes_p && character == '\033')
-    the_rep[i++] = character;
+  if (raw_escapes_p && mb_iseq (*character, '\033'))
+    the_rep[i++] = '\033';
   /* Show CTRL-x as ^X.  */
-  else if (iscntrl (character) && character < 127)
+  else if (mb_iscntrl (*character)
+          && character->wc >= 0 && character->wc < 127)
     {
-      switch (character)
+      switch ((char)character->wc)
         {
         case '\r':
         case '\n':
-          the_rep[i++] = character;
+          the_rep[i++] = (char)character->wc;
           break;
 
         case '\t':
@@ -511,22 +512,25 @@
 
         default:
           the_rep[i++] = '^';
-          the_rep[i++] = (character | 0x40);
+          the_rep[i++] = ((char)character->wc | 0x40);
         }
     }
   /* Show META-x as 0370.  */
-  else if (character > printable_limit)
+  else if (!ISO_Latin_p && character->wc >= 0 && character->wc > 127)
     {
-      sprintf (the_rep + i, "\\%0o", character);
+      sprintf (the_rep + i, "\\%0o", (unsigned)character->wc);
       i = strlen (the_rep);
     }
-  else if (character == DEL)
+  else if (mb_iseq (*character, DEL))
     {
       the_rep[i++] = '^';
       the_rep[i++] = '?';
     }
   else
-    the_rep[i++] = character;
+    {
+      memcpy (the_rep, mb_ptr (*character), mb_len (*character));
+      i = mb_len (*character);
+    }
 
   the_rep[i] = 0;
 
Index: info/info-utils.h
===================================================================
RCS file: /sources/texinfo/texinfo/info/info-utils.h,v
retrieving revision 1.5
diff -u -r1.5 info-utils.h
--- info/info-utils.h   15 May 2005 00:00:07 -0000      1.5
+++ info/info-utils.h   30 Jan 2007 04:09:17 -0000
@@ -23,6 +23,8 @@
 #ifndef INFO_UTILS_H
 #define INFO_UTILS_H
 
+#include "info.h"
+#include "mbchar.h"
 #include "nodes.h"
 #include "window.h"
 #include "search.h"
@@ -99,7 +101,7 @@
 
 /* Return a pointer to a string which is the printed representation
    of CHARACTER if it were printed at HPOS. */
-extern char *printed_representation (unsigned char character, int hpos);
+extern char *printed_representation (const mbchar_t *character, int hpos);
 
 /* Return a pointer to the part of PATHNAME that simply defines the file. */
 extern char *filename_non_directory (char *pathname);
Index: info/info.c
===================================================================
RCS file: /sources/texinfo/texinfo/info/info.c,v
retrieving revision 1.14
diff -u -r1.14 info.c
--- info/info.c 13 Aug 2006 23:36:22 -0000      1.14
+++ info/info.c 30 Jan 2007 04:09:17 -0000
@@ -154,10 +154,6 @@
 #ifdef HAVE_SETLOCALE
   /* Set locale via LC_ALL.  */
   setlocale (LC_ALL, "");
-  /* But don't use translated messages in the case when
-     string width and length can differ */
-  if (MB_CUR_MAX > 1)
-    setlocale(LC_MESSAGES, "C");
 #endif
 
 #ifdef ENABLE_NLS
Index: info/infodoc.c
===================================================================
RCS file: /sources/texinfo/texinfo/info/infodoc.c,v
retrieving revision 1.10
diff -u -r1.10 infodoc.c
--- info/infodoc.c      15 Jan 2006 23:20:11 -0000      1.10
+++ info/infodoc.c      30 Jan 2007 04:09:17 -0000
@@ -21,6 +21,8 @@
    Written by Brian Fox (address@hidden). */
 
 #include "info.h"
+#include "mbchar.h"
+#include "mbswidth.h"
 #include "funs.h"
 
 /* HELP_NODE_GETS_REGENERATED is always defined now that keys may get
@@ -247,15 +249,18 @@
             printf_to_message_buffer ("(%s)", name, NULL, NULL);
             length_so_far = message_buffer_length_this_line ();
 
-            if ((desired_doc_start + strlen (doc))
+            if ((desired_doc_start + mbswidth (doc, 0))
                 >= (unsigned int) the_screen->width)
               printf_to_message_buffer ("\n     ", NULL, NULL, NULL);
             else
               {
                 while (length_so_far < desired_doc_start)
                   {
+                   mbchar_t mbc;
+
                     printf_to_message_buffer ("\t", NULL, NULL, NULL);
-                    length_so_far += character_width ('\t', length_so_far);
+                   mb_setascii (&mbc, '\t');
+                    length_so_far += character_width (&mbc, length_so_far);
                   }
               }
           }
Index: info/session.c
===================================================================
RCS file: /sources/texinfo/texinfo/info/session.c,v
retrieving revision 1.17
diff -u -r1.17 session.c
--- info/session.c      15 May 2005 00:00:07 -0000      1.17
+++ info/session.c      30 Jan 2007 04:09:17 -0000
@@ -21,6 +21,8 @@
    Originally written by Brian Fox (address@hidden). */
 
 #include "info.h"
+#include "mbiter.h"
+#include "xalloc.h"
 #include "search.h"
 #include <sys/ioctl.h>
 
@@ -692,39 +694,58 @@
     info_backward_char (window, -count, key);
   else
     {
-      window->point += count;
+      mbi_iterator_t it;
 
-      if (window->point >= window->node->nodelen)
-        window->point = window->node->nodelen - 1;
+      for (mbi_init (it, window->node->contents + window->point,
+                    window->node->nodelen - window->point);
+          mbi_avail (it) && count != 0; mbi_advance (it), count--)
+       ;
+      window->point = mbi_cur_ptr (it) - window->node->contents;
 
-      info_show_point (window);
+      if (window->point >= window->node->nodelen)
+       info_end_of_node (window, 0, key);
+      else
+       info_show_point (window);
     }
 }
 
 /* Move point backward in the node. */
 DECLARE_INFO_COMMAND (info_backward_char, _("Move backward a character"))
 {
-  if (count < 0)
+  if (count <= 0)
     info_forward_char (window, -count, key);
   else
     {
-      window->point -= count;
-
-      if (window->point < 0)
+      const char **last_n;
+      mbi_iterator_t it;
+      size_t i;
+
+      last_n = XCALLOC (count, const char *);
+      i = 0;
+      /* last_n[i, (i + 1) % count, ..., count - 1] point to the previous
+        COUNT characters, or NULL if there is no such character. */
+      for (mbi_init (it, window->node->contents, window->point);
+          mbi_avail (it); mbi_advance (it))
+       {
+         last_n[i] = mbi_cur_ptr (it);
+         i = (i + 1) % count;
+       }
+      if (last_n[i] != NULL)
+       window->point = last_n[i] - window->node->contents;
+      else
         window->point = 0;
+      free (last_n);
 
       info_show_point (window);
     }
 }
 
-#define alphabetic(c) (islower (c) || isupper (c) || isdigit (c))
+#define alphabetic(c) (mb_isalpha (c) || mb_isdigit (c))
 
 /* Move forward a word in this node. */
 DECLARE_INFO_COMMAND (info_forward_word, _("Move forward a word"))
 {
-  long point;
-  char *buffer;
-  int end, c;
+  mbi_iterator_t it;
 
   if (count < 0)
     {
@@ -732,89 +753,94 @@
       return;
     }
 
-  point = window->point;
-  buffer = window->node->contents;
-  end = window->node->nodelen;
+  mbi_init (it, window->node->contents + window->point,
+           window->node->nodelen - window->point);
 
   while (count)
     {
-      if (point + 1 >= end)
-        return;
+      if (!mbi_avail (it))
+       break;
 
       /* If we are not in a word, move forward until we are in one.
          Then, move forward until we hit a non-alphabetic character. */
-      c = buffer[point];
-
-      if (!alphabetic (c))
+      if (!alphabetic (mbi_cur (it)))
         {
-          while (++point < end)
-            {
-              c = buffer[point];
-              if (alphabetic (c))
-                break;
-            }
+         do
+           mbi_advance (it);
+         while (mbi_avail (it) && !alphabetic (mbi_cur (it)));
         }
 
-      if (point >= end) return;
+      if (!mbi_avail (it))
+       break;
 
-      while (++point < end)
-        {
-          c = buffer[point];
-          if (!alphabetic (c))
-            break;
-        }
+      do
+       mbi_advance (it);
+      while (mbi_avail (it) && alphabetic (mbi_cur (it)));
       --count;
     }
-  window->point = point;
-  info_show_point (window);
+  if (!mbi_avail (it))
+    info_end_of_node (window, 0, key);
+  else
+    {
+      window->point = mbi_cur_ptr (it) - window->node->contents;
+      info_show_point (window);
+    }
 }
 
 DECLARE_INFO_COMMAND (info_backward_word, _("Move backward a word"))
 {
-  long point;
-  char *buffer;
-  int c;
+  mbi_iterator_t it;
+  const char **chars;
+  size_t i;
 
-  if (count < 0)
+  if (count <= 0)
     {
       info_forward_word (window, -count, key);
       return;
     }
 
-  buffer = window->node->contents;
-  point = window->point;
+  chars = XNMALLOC (window->point + 1, const char *);
+  i = 0;
+  for (mbi_init (it, window->node->contents, window->point); mbi_avail (it);
+       mbi_advance (it))
+    chars[i++] = mbi_cur_ptr (it);
+  chars[i] = mbi_cur_ptr (it);
 
   while (count)
     {
-      if (point == 0)
+      if (i == 0)
         break;
 
       /* Like info_forward_word (), except that we look at the
          characters just before point. */
 
-      c = buffer[point - 1];
+      mbi_init (it, chars[i - 1], chars[i] - chars[i - 1]);
+      mbi_avail (it);
 
-      if (!alphabetic (c))
+      if (!alphabetic (mbi_cur (it)))
         {
-          while (--point)
+          while (--i != 0)
             {
-              c = buffer[point - 1];
-              if (alphabetic (c))
+             mbi_init (it, chars[i - 1], chars[i] - chars[i - 1]);
+             mbi_avail (it);
+              if (alphabetic (mbi_cur (it)))
                 break;
             }
         }
 
-      while (point)
+      while (i != 0)
         {
-          c = buffer[point - 1];
-          if (!alphabetic (c))
+         mbi_init (it, chars[i - 1], chars[i] - chars[i - 1]);
+         mbi_avail (it);
+          if (!alphabetic (mbi_cur (it)))
             break;
           else
-            --point;
+            --i;
         }
       --count;
     }
-  window->point = point;
+  window->point = chars[i] - window->node->contents;
+  free (chars);
   info_show_point (window);
 }
 
@@ -1276,7 +1302,14 @@
 /* Move to the end of the node. */
 DECLARE_INFO_COMMAND (info_end_of_node, _("Move to the end of this node"))
 {
-  window->point = window->node->nodelen - 1;
+  mbi_iterator_t it;
+  const char *last;
+
+  last = window->node->contents;
+  for (mbi_init (it, window->node->contents, window->node->nodelen);
+       mbi_avail (it); mbi_advance (it))
+    last = mbi_cur_ptr (it);
+  window->point = last - window->node->contents;
   info_show_point (window);
 }
 
@@ -4968,7 +5001,9 @@
     }
 }
 
-/* How to read a single character. */
+/* How to read a single character.
+
+   FIXME: multibyte characters are not supported. */
 unsigned char
 info_get_input_char (void)
 {
Index: info/window.c
===================================================================
RCS file: /sources/texinfo/texinfo/info/window.c,v
retrieving revision 1.5
diff -u -r1.5 window.c
--- info/window.c       15 May 2005 00:00:07 -0000      1.5
+++ info/window.c       30 Jan 2007 04:09:17 -0000
@@ -21,6 +21,8 @@
    Written by Brian Fox (address@hidden). */
 
 #include "info.h"
+#include "mbiter.h"
+#include "mbuiter.h"
 #include "nodes.h"
 #include "window.h"
 #include "display.h"
@@ -702,7 +704,7 @@
 /* Return the number of characters it takes to display CHARACTER on the
    screen at HPOS. */
 int
-character_width (int character, int hpos)
+character_width (const mbchar_t *character, int hpos)
 {
   int printable_limit = 127;
   int width = 1;
@@ -710,11 +712,12 @@
   if (ISO_Latin_p)
     printable_limit = 255;
 
-  if (character > printable_limit)
-    width = 3;
-  else if (iscntrl (character))
+  if (mb_isprint (*character))
+    width = mb_width (*character);
+  else if (mb_iscntrl (*character)
+          && character->wc >= 0 && character->wc < 127)
     {
-      switch (character)
+      switch ((char)character->wc)
         {
         case '\r':
         case '\n':
@@ -727,8 +730,12 @@
           width = 2;
         }
     }
-  else if (character == DEL)
+  else if (!ISO_Latin_p && character->wc >= 0 && character->wc > 127)
+    width = 3;
+  else if (mb_iseq (*character, DEL))
     width = 2;
+  else
+    width = 0;
 
   return (width);
 }
@@ -738,24 +745,27 @@
 int
 string_width (char *string, int hpos)
 {
-  register int i, width, this_char_width;
+  register int width, this_char_width;
+  mbui_iterator_t i;
 
-  for (width = 0, i = 0; string[i]; i++)
+  width = 0;
+  for (mbui_init (i, string); mbui_avail (i); mbui_advance (i))
     {
       /* Support ANSI escape sequences for -R.  */
       if (raw_escapes_p
-         && string[i] == '\033'
-         && string[i+1] == '['
-         && isdigit (string[i+2])
-         && (string[i+3] == 'm'
-             || (isdigit (string[i+3]) && string[i+4] == 'm')))
+         && mb_iseq (mbui_cur (i), '\033')
+         && mbui_cur_ptr (i)[1] == '['
+         && isdigit (mbui_cur_ptr(i)[2])
+         && (mbui_cur_ptr (i)[3] == 'm'
+             || (isdigit (mbui_cur_ptr (i)[3])
+                 && mbui_cur_ptr (i)[4] == 'm')))
        {
-         while (string[i] != 'm')
-           i++;
+         while (mbui_avail (i) && !mb_iseq (mbui_cur (i), 'm'))
+           mbui_advance (i);
          this_char_width = 0;
        }
       else
-       this_char_width = character_width (string[i], hpos);
+       this_char_width = character_width (&mbui_cur (i), hpos);
       width += this_char_width;
       hpos += this_char_width;
     }
@@ -786,7 +796,8 @@
 void
 calculate_line_starts (WINDOW *window)
 {
-  register int i, hpos;
+  mbi_iterator_t i;
+  register int hpos;
   char **line_starts = NULL;
   int line_starts_index = 0, line_starts_slots = 0;
   int bump_index;
@@ -802,57 +813,45 @@
   /* Grovel the node starting at the top, and for each line calculate the
      width of the characters appearing in that line.  Add each line start
      to our array. */
-  i = 0;
+  mbi_init (i, node->contents, node->nodelen);
   hpos = 0;
   bump_index = 0;
 
-  while (i < node->nodelen)
+  while (mbi_avail (i))
     {
-      char *line = node->contents + i;
-      unsigned int cwidth, c;
+      char *line = (char *)mbi_cur_ptr (i);
+      unsigned int cwidth;
 
       add_pointer_to_array (line, line_starts_index, line_starts,
                             line_starts_slots, 100, char *);
       if (bump_index)
         {
-          i++;
+          mbi_advance (i);
           bump_index = 0;
         }
 
-      while (1)
+      while (mbi_avail (i))
         {
-         /* The cast to unsigned char is for 8-bit characters, which
-            could be passed as negative integers to character_width
-            and wreak havoc on some naive implementations of iscntrl.  */
-          c = (unsigned char) node->contents[i];
-
          /* Support ANSI escape sequences for -R.  */
          if (raw_escapes_p
-             && c == '\033'
-             && node->contents[i+1] == '['
-             && isdigit (node->contents[i+2]))
+             && mb_iseq (mbi_cur (i), '\033')
+             && mbi_cur_ptr (i)[1] == '['
+             && isdigit (mbi_cur_ptr (i)[2])
+             && (mbi_cur_ptr (i)[3] == 'm'
+                 || (isdigit (mbi_cur_ptr (i)[3])
+                     && mbi_cur_ptr (i)[4] == 'm')))
            {
-             if (node->contents[i+3] == 'm')
-               {
-                 i += 3;
-                 cwidth = 0;
-               }
-             else if (isdigit (node->contents[i+3])
-                      && node->contents[i+4] == 'm')
-               {
-                 i += 4;
-                 cwidth = 0;
-               }
-             else
-               cwidth = character_width (c, hpos);
+             while (mbi_avail (i) && !mb_iseq (mbi_cur (i), 'm'))
+               mbi_advance (i);
+             cwidth = 0;
            }
          else
-           cwidth = character_width (c, hpos);
+           cwidth = character_width (&mbi_cur (i), hpos);
 
           /* If this character fits within this line, just do the next one. */
           if ((hpos + cwidth) < (unsigned int) window->width)
             {
-              i++;
+              mbi_advance (i);
               hpos += cwidth;
               continue;
             }
@@ -860,15 +859,16 @@
             {
               /* If this character would position the cursor at the start of
                  the next printed screen line, then do the next line. */
-              if (c == '\n' || c == '\r' || c == '\t')
+              if (mb_iseq (mbi_cur (i), '\n') || mb_iseq (mbi_cur (i), '\r')
+                 || mb_iseq (mbi_cur (i), '\t'))
                 {
-                  i++;
+                 mbi_advance (i);
                   hpos = 0;
                   break;
                 }
               else
                 {
-                  /* This character passes the window width border.  Postion
+                  /* This character passes the window width border.  Position
                      the cursor after the printed character, but remember this
                      line start as where this character is.  A bit tricky. */
 
@@ -877,11 +877,11 @@
                   if (window->flags & W_NoWrap)
                     {
                       hpos = 0;
-                      while (i < node->nodelen && node->contents[i] != '\n')
-                        i++;
+                      while (mbi_avail (i) && !mb_iseq (mbi_cur (i), '\n'))
+                       mbi_advance (i);
 
-                      if (node->contents[i] == '\n')
-                        i++;
+                      if (mbi_avail (i) && mb_iseq (mbi_cur (i), '\n'))
+                        mbi_advance (i);
                     }
                   else
                     {
@@ -1009,6 +1009,7 @@
 int
 window_get_cursor_column (WINDOW *window)
 {
+  mbi_iterator_t it;
   int i, hpos, end;
   char *line;
 
@@ -1020,23 +1021,23 @@
   line = window->line_starts[i];
   end = window->point - (line - window->node->contents);
 
-  for (hpos = 0, i = 0; i < end; i++)
+  hpos = 0;
+  for (mbi_init (it, line, end); mbi_avail (it); mbi_advance (it))
     {
       /* Support ANSI escape sequences for -R.  */
       if (raw_escapes_p
-         && line[i] == '\033'
-         && line[i+1] == '['
-         && isdigit (line[i+2]))
+         && mb_iseq (mbi_cur (it), '\033')
+         && mbi_cur_ptr (it)[1] == '['
+         && isdigit (mbi_cur_ptr (it)[2])
+         && (mbi_cur_ptr (it)[3] == 'm'
+             || (isdigit (mbi_cur_ptr (it)[3])
+                 && mbi_cur_ptr (it)[4] == 'm')))
        {
-         if (line[i+3] == 'm')
-           i += 3;
-         else if (isdigit (line[i+3]) && line[i+4] == 'm')
-           i += 4;
-         else
-           hpos += character_width (line[i], hpos);
+         while (mbi_avail (it) && !mb_iseq (mbi_cur (it), 'm'))
+           mbi_advance (it);
        }
       else
-       hpos += character_width (line[i], hpos);
+       hpos += character_width (&mbi_cur (it), hpos);
     }
 
   return (hpos);
@@ -1047,28 +1048,34 @@
 int
 window_chars_to_goal (char *line, int goal)
 {
-  register int i, check = 0, hpos;
+  register int check = 0, hpos;
+  mbui_iterator_t i;
 
-  for (hpos = 0, i = 0; line[i] != '\n'; i++)
+  hpos = 0;
+  for (mbui_init (i, line); mbui_avail (i) && !mb_iseq (mbui_cur (i), '\n');
+       mbui_advance (i))
     {
       /* Support ANSI escape sequences for -R.  */
       if (raw_escapes_p
-         && line[i] == '\033'
-         && line[i+1] == '['
-         && isdigit (line[i+2])
-         && (line[i+3] == 'm'
-             || (isdigit (line[i+3]) && line[i+4] == 'm')))
-       while (line[i] != 'm')
-         i++;
+         && mb_iseq (mbui_cur (i), '\033')
+         && mbui_cur_ptr (i)[1] == '['
+         && isdigit (mbui_cur_ptr (i)[2])
+         && (mbui_cur_ptr (i)[3] == 'm'
+             || (isdigit (mbui_cur_ptr (i)[3])
+                 && mbui_cur_ptr (i)[4] == 'm')))
+       {
+         while (mbui_avail (i) && !mb_iseq (mbui_cur (i), 'm'))
+           mbui_advance (i);
+       }
       else
-       check = hpos + character_width (line[i], hpos);
-
-      if (check > goal)
-        break;
-
-      hpos = check;
+       {
+         check = hpos + character_width (&mbui_cur(i), hpos);
+         if (check > goal)
+           break;
+         hpos = check;
+       }
     }
-  return (i);
+  return mbui_cur_ptr (i) - line;
 }
 
 /* Create a modeline for WINDOW, and store it in window->modeline. */
Index: info/window.h
===================================================================
RCS file: /sources/texinfo/texinfo/info/window.h,v
retrieving revision 1.4
diff -u -r1.4 window.h
--- info/window.h       15 May 2005 00:00:07 -0000      1.4
+++ info/window.h       30 Jan 2007 04:09:17 -0000
@@ -25,6 +25,8 @@
 #ifndef INFO_WINDOW_H
 #define INFO_WINDOW_H
 
+#include "info.h"
+#include "mbchar.h"
 #include "infomap.h"
 #include "nodes.h"
 
@@ -215,7 +217,7 @@
 
 /* Return the number of characters it takes to display CHARACTER on the
    screen at HPOS. */
-extern int character_width (int character, int hpos);
+extern int character_width (const mbchar_t *character, int hpos);
 
 /* Return the number of characters it takes to display STRING on the
    screen at HPOS. */

reply via email to

[Prev in Thread] Current Thread [Next in Thread]