bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#13399: 24.3.50; Word-wrap can't wrap at zero-width space U-200B


From: Lars Ingebrigtsen
Subject: bug#13399: 24.3.50; Word-wrap can't wrap at zero-width space U-200B
Date: Fri, 18 Sep 2020 16:55:40 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/28.0.50 (gnu/linux)

Adam Tack <adam.tack.513@gmail.com> writes:

> I've split out the non-nil char-table case out into a function, as I
> think that using a named function slightly improves readability, and
> having a macro over 20 lines long, somehow feels "wrong".  If the
> compiler does actually follow the inline directive, there should be no
> additional performance hit.

This was the last post in the thread, and the patch no longer applied,
so I've respun it for Emacs 28.

However, I can't find any copyright assignment on file -- Adam, did you
go through with the assignment process?

diff --git a/doc/emacs/display.texi b/doc/emacs/display.texi
index e7b8745a04..9fcca8c6e6 100644
--- a/doc/emacs/display.texi
+++ b/doc/emacs/display.texi
@@ -1831,6 +1831,14 @@ Visual Line Mode
 report.  You can add categories to a character using the command
 @code{modify-category-entry}.
 
+@vindex word-wrap-chars
+@findex word-wrap-chars-mode
+  Word boundaries and hence points at which word wrap can occur are,
+by default, considered to occur on the space and tab characters.  If
+you prefer word-wrap to be permissible at other characters, you can
+change the value of the char-table @code{word-wrap-chars}, or use
+@code{word-wrap-chars-mode}, which does this for you.
+
 @node Display Custom
 @section Customization of Display
 
diff --git a/etc/NEWS b/etc/NEWS
index 54bad068f8..f3216ed445 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -73,6 +73,19 @@ its implementation has been removed from the Linux kernel.
 OpenBSD 5.3 and older releases are no longer supported, as they lack
 proper pty support that Emacs needs.
 
++++
+** The characters at which word-wrapping occurs can now be controlled
+using the new `word-wrap-chars' char-table.  If `word-wrap-chars' is
+nil (the default), then word-wrapping will occur only on the space or
+tab characters, as has been the case until now.
+
+The most convenient way to change the characters at which wrap occurs
+is customizing the new variable `word-wrap-type' and using the new
+`word-wrap-chars-mode' minor mode, which sets `word-wrap-chars' based
+on `word-wrap-type', for you.  The options for `word-wrap-type' are
+ascii-whitespace, unicode-whitespace and a customizable list of
+character codes and character code ranges.
+

 * Startup Changes in Emacs 28.1
 
diff --git a/lisp/simple.el b/lisp/simple.el
index 7dc695848b..b881cbc23e 100644
--- a/lisp/simple.el
+++ b/lisp/simple.el
@@ -7269,6 +7269,117 @@ turn-on-visual-line-mode
 (define-globalized-minor-mode global-visual-line-mode
   visual-line-mode turn-on-visual-line-mode)
 
+
+(defvar word-wrap-type)
+
+(defvar word-wrap-chars--saved nil)
+
+(define-minor-mode word-wrap-chars-mode
+  "Toggle wrapping using a look-up to `word-wrap-chars'.
+The exact choice of characters on which wrapping occurs, depends
+on the value of `word-wrap-type'.  By default, `word-wrap-type'
+is set to unicode-white-space, which allows word wrapping on all
+breakable unicode whitespace, not only space and tap.
+
+For details of other customization options, see
+`word-wrap-type'.
+
+This minor mode has no effect unless `visual-line-mode' is
+enabled or `word-wrap' is set to t.
+
+To toggle wrapping using a look-up, globally, use
+`global-word-wrap-chars-mode'."
+  :group 'visual-line
+  :lighter " wwc"
+  (if word-wrap-chars-mode
+      (progn
+        (if (local-variable-p 'word-wrap-chars)
+            (setq-local word-wrap-chars--saved
+                        word-wrap-chars))
+        (set-word-wrap-chars))
+    (setq-local word-wrap-chars word-wrap-chars--saved)))
+
+(defun turn-on-word-wrap-chars-mode ()
+  (visual-line-mode 1))
+
+(define-globalized-minor-mode global-word-wrap-chars-mode
+  word-wrap-chars-mode turn-on-word-wrap-chars-mode)
+
+(defun update-word-wrap-chars ()
+  "Update `word-wrap-chars' upon Customize of `word-wrap-type'.
+
+Only buffers which use the `word-wrap-chars-mode' are affected."
+  (mapcar #'(lambda (buf)
+             (with-current-buffer buf
+               (if word-wrap-chars-mode
+                    (set-word-wrap-chars))))
+         (buffer-list)))
+
+(defun set-word-wrap-chars ()
+  "Set `word-wrap-chars' locally, based on `word-wrap-type'."
+  (cond
+   ((eq word-wrap-type 'ascii-whitespace)
+    (setq-local word-wrap-chars nil))
+   ((eq word-wrap-type 'unicode-whitespace)
+    (set-word-wrap-chars-from-list
+     '(9 32 5760 (8192 . 8198) (8200 . 8203) 8287 12288)))
+   ((listp word-wrap-type)
+    (set-word-wrap-chars-from-list word-wrap-type))))
+
+(defun set-word-wrap-chars-from-list (list)
+  "Set `word-wrap-chars' locally from a list.
+Each element of the list can be a character code (code point) or
+a cons of character codes, representing the two (inclusive)
+endpoints of the range of characters."
+  (setq-local
+   word-wrap-chars
+   (let ((char-table (make-char-table nil nil)))
+     (dolist (range list char-table)
+       (set-char-table-range char-table range t)))))
+
+(defcustom word-wrap-type
+  'unicode-whitespace
+  "Characters on which word-wrap occurs.
+This variable controls the value of `word-wrap-chars' that is set
+by `word-wrap-chars-mode`.  `word-wrap-chars' determines on what
+characters word-wrapping can occur, when `word-wrap' is t or
+`visual-line-mode' is enabled.
+
+Possible values are ascii-whitespace, unicode-whitespace or a
+custom list of characters and character ranges.
+
+If the value is `ascii-whitespace', word-wrap is only on space
+and tab.  If the value is `unicode-whitespace', word-wrap is on
+all the Unicode whitespace characters that permit wrapping,
+including but not limited to space and tab.
+
+If a custom list of characters and ranges is used, word wrap is
+on these characters and character ranges.  The ranges are
+inclusive of both endpoints.
+
+When you change this without using customize, you need to call
+`update-word-wrap-chars' to update the word wrap in current
+buffers.  For instance:
+
+(setq word-wrap-type \\='(9 32 ?_))
+(update-word-wrap-chars)
+
+will set the wrappable characters to space, tab and underscore,
+in all buffers in `word-wrap-chars-mode' and using the default
+value of `word-wrap-type'.
+"
+  :type '(choice (const :tag "Space and tab" ascii-whitespace)
+                (const :tag "All unicode spaces" unicode-whitespace)
+                (repeat :tag "Custom characters or ranges"
+                        :value (9 32)
+                        (choice (character)
+                                (cons :tag "Range" character character))))
+  :set (lambda (symbol value)
+        (set-default symbol value)
+        (update-word-wrap-chars))
+  :group 'visual-line
+  :version 27.1)
+

 (defun transpose-chars (arg)
   "Interchange characters around point, moving forward one character.
diff --git a/src/buffer.c b/src/buffer.c
index 241f2d43a9..5c26323d69 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -5786,7 +5786,12 @@ syms_of_buffer (void)
 Visual Line mode.  Visual Line mode, when enabled, sets `word-wrap'
 to t, and additionally redefines simple editing commands to act on
 visual lines rather than logical lines.  See the documentation of
-`visual-line-mode'.  */);
+`visual-line-mode'.
+
+If `word-wrap-chars' is non-nil and a char-table, continuation lines
+are wrapped on the characters in `word-wrap-chars' whose value is t,
+rather than the space and tab characters.  `word-wrap-chars-mode
+provides a convenient interface for using this.  */);
 
   DEFVAR_PER_BUFFER ("default-directory", &BVAR (current_buffer, directory),
                     Qstringp,
diff --git a/src/character.c b/src/character.c
index 5860f6a0c8..032f4fc12b 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1084,4 +1084,14 @@ syms_of_character (void)
 See The Unicode Standard for the meaning of those values.  */);
   /* The correct char-table is setup in characters.el.  */
   Vunicode_category_table = Qnil;
+
+  DEFVAR_LISP ("word-wrap-chars", Vword_wrap_chars,
+              doc: /* A char-table for characters at which word-wrap occurs.
+Such characters have value t in this table.  If the char-table is nil,
+word-wrap occurs only on space and tab.
+
+For a more user-friendly way of changing the characters at which
+word-wrap can occur, consider using `word-wrap-chars-mode' and
+customizing `word-wrap-type'. */);
+  Vword_wrap_chars = Qnil;
 }
diff --git a/src/xdisp.c b/src/xdisp.c
index 615f0ca7cf..744b9a52c7 100644
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -494,20 +494,42 @@ #define IT_OVERFLOW_NEWLINE_INTO_FRINGE(it) false
 #endif /* HAVE_WINDOW_SYSTEM */
 
 /* Test if the display element loaded in IT, or the underlying buffer
-   or string character, is a space or a TAB character.  This is used
-   to determine where word wrapping can occur.  */
+   or string character, is a space or tab (by default, to avoid the
+   unnecessary performance hit of char-table lookup).  If
+   word-wrap-chars is a char-table, then instead check if the relevant
+   element or character belongs to the char-table.  This is used to
+   determine where word wrapping can occur.  */
 
 #define IT_DISPLAYING_WHITESPACE(it)                                   \
-  ((it->what == IT_CHARACTER && (it->c == ' ' || it->c == '\t'))       \
-   || ((STRINGP (it->string)                                           \
-       && (SREF (it->string, IT_STRING_BYTEPOS (*it)) == ' '           \
-           || SREF (it->string, IT_STRING_BYTEPOS (*it)) == '\t'))     \
-       || (it->s                                                       \
-          && (it->s[IT_BYTEPOS (*it)] == ' '                           \
-              || it->s[IT_BYTEPOS (*it)] == '\t'))                     \
-       || (IT_BYTEPOS (*it) < ZV_BYTE                                  \
-          && (*BYTE_POS_ADDR (IT_BYTEPOS (*it)) == ' '                 \
-              || *BYTE_POS_ADDR (IT_BYTEPOS (*it)) == '\t'))))
+  (!CHAR_TABLE_P (Vword_wrap_chars)                                    \
+   ? ((it->what == IT_CHARACTER && (it->c == ' ' || it->c == '\t'))    \
+      || ((STRINGP (it->string)                                                
\
+          && (SREF (it->string, IT_STRING_BYTEPOS (*it)) == ' '        \
+              || SREF (it->string, IT_STRING_BYTEPOS (*it)) == '\t'))  \
+         || (it->s                                                     \
+             && (it->s[IT_BYTEPOS (*it)] == ' '                        \
+                 || it->s[IT_BYTEPOS (*it)] == '\t'))                  \
+         || (IT_BYTEPOS (*it) < ZV_BYTE                                \
+             && (*BYTE_POS_ADDR (IT_BYTEPOS (*it)) == ' '              \
+                 || *BYTE_POS_ADDR (IT_BYTEPOS (*it)) == '\t'))))      \
+   : it_displaying_word_wrap_char(it))                                 \
+
+static inline bool
+char_is_word_wrap_char_p (int c) {
+  return !NILP (CHAR_TABLE_REF (Vword_wrap_chars, c));
+}
+
+static inline bool
+it_displaying_word_wrap_char (struct it *it) {
+  return ((it->what == IT_CHARACTER && char_is_word_wrap_char_p (it->c))
+         || (STRINGP (it->string) && char_is_word_wrap_char_p
+             (STRING_CHAR
+              (SDATA (it->string) + IT_STRING_BYTEPOS (*it))))
+         || (it->s && char_is_word_wrap_char_p
+             (STRING_CHAR(it->s + IT_BYTEPOS (*it))))
+         || (IT_BYTEPOS (*it) < ZV_BYTE && char_is_word_wrap_char_p
+             (FETCH_CHAR (IT_BYTEPOS (*it)))));
+}
 
 /* These are the category sets we use.  They are defined by
    kinsoku.el and chracters.el.  */
diff --git a/test/manual/word-wrap-test.el b/test/manual/word-wrap-test.el
new file mode 100644
index 0000000000..593c2decc7
--- /dev/null
+++ b/test/manual/word-wrap-test.el
@@ -0,0 +1,127 @@
+;;; word-wrap-test.el -- tests for word-wrap -*- lexical-binding: t -*-
+
+;; Copyright (C) 2017 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; Run the tests M-x word-wrap-test-[1-4] which correspond to the four
+;; combinations:
+;;
+;; i)  whitespace-mode being enabled and disabled,
+;;
+;; ii) word-wrap-chars being nil and equal to a char-table that
+;; specifies U-200B as the only word-wrap character.
+;;
+;; The tests with whitespace-mode are needed to help avoid a
+;; regression on Bug#11341.
+
+;;; Code:
+
+(setq whitespace-display-mappings-for-zero-width-space
+      '((space-mark 32
+                    [183]
+                    [46])
+        (space-mark 160
+                    [164]
+                    [95])
+        (space-mark 8203
+                    [164]
+                    [95])
+        (newline-mark 10
+                      [36 10])
+        (tab-mark 9
+                  [187 9]
+                  [92 9])))
+
+(defun word-wrap-test-1 ()
+  "Check word-wrap for nil `word-wrap-chars'."
+  (interactive)
+  (let ((buf (get-buffer-create "*Word-wrap Test 1*")))
+    (with-current-buffer buf
+      (erase-buffer)
+      (insert "Word wrap should occur for space.\n\n")
+      (dotimes (i 100)
+        (insert "1234567 ")) ; Space
+      (insert "\n\nWord wrap should NOT occur for U-200B.\n\n")
+      (dotimes (i 100)
+        (insert "1234567​")) ; U-200B
+      (setq word-wrap t)
+      (setq-local word-wrap-chars nil)
+      (whitespace-mode -1)
+      (display-buffer buf))))
+
+(defun word-wrap-test-2 ()
+  "Check word-wrap for nil `word-wrap-chars' with whitespace-mode."
+  (interactive)
+  (let ((buf (get-buffer-create "*Word-wrap Test 2*")))
+    (with-current-buffer buf
+      (erase-buffer)
+      (insert "Word wrap should occur for space (displayed as `·').\n\n")
+      (dotimes (i 100)
+        (insert "1234567 ")) ; Space
+      (insert "\n\nWord wrap should NOT occur for U-200B (displayed as 
`¤').\n\n")
+      (dotimes (i 100)
+        (insert "1234567​")) ; U-200B
+      (setq word-wrap t)
+      (setq-local word-wrap-chars nil)
+      (setq-local whitespace-display-mappings
+                  whitespace-display-mappings-for-zero-width-space)
+      (whitespace-mode)
+      (display-buffer buf))))
+
+(defun word-wrap-test-3 ()
+  "Check word-wrap if `word-wrap-chars' is a char-table."
+  (interactive)
+  (let ((buf (get-buffer-create "*Word-wrap Test 3*")))
+    (with-current-buffer buf
+      (erase-buffer)
+      (insert "Word wrap should NOT occur for space.\n\n")
+      (dotimes (i 100)
+        (insert "1234567 ")) ; Space
+      (insert "\n\nWord wrap should occur for U-200B.\n\n")
+      (dotimes (i 100)
+        (insert "1234567​")) ; U-200B
+      (setq word-wrap t)
+      (setq-local word-wrap-chars
+                  (let ((ct (make-char-table nil nil)))
+                    (set-char-table-range ct 8203 t)
+                    ct))
+      (whitespace-mode -1)
+      (display-buffer buf))))
+
+(defun word-wrap-test-4 ()
+  "Check word-wrap if `word-wrap-chars' is a char-table, for whitespace-mode."
+  (interactive)
+  (let ((buf (get-buffer-create "*Word-wrap Test 4*")))
+    (with-current-buffer buf
+      (erase-buffer)
+      (insert "Word wrap should NOT occur for space (displayed as `·').\n\n")
+      (dotimes (i 100)
+        (insert "1234567 ")) ; Space
+      (insert "\n\nWord wrap should occur for U-200B (displayed as `¤').\n\n")
+      (dotimes (i 100)
+        (insert "1234567​")) ; U-200B
+      (setq word-wrap t)
+      (setq-local word-wrap-chars
+                  (let ((ct (make-char-table nil nil)))
+                    (set-char-table-range ct 8203 t)
+                    ct))
+      (setq-local whitespace-display-mappings
+                  whitespace-display-mappings-for-zero-width-space)
+      (whitespace-mode)
+      (display-buffer buf))))


-- 
(domestic pets only, the antidote for overdose, milk.)
   bloggy blog: http://lars.ingebrigtsen.no





reply via email to

[Prev in Thread] Current Thread [Next in Thread]