[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: emacs-w3m display html patch
From: |
Henrik S. Hansen |
Subject: |
Re: emacs-w3m display html patch |
Date: |
Tue, 19 Oct 2004 01:37:11 +0200 |
User-agent: |
Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux) |
This is version 2 of the display html patch. It
now also accounts for some non-standard HTML entities, as well as makes
w3m-filter-asahi-shimbun also call w3m-ucs-to-string instead of
w3m-ucs-to-char. I hope it's useful. Comments are very welcome.
The display_html patch, version 2:
Author: Henrik S. Hansen <address@hidden>
License: GNU General Public License, ver. 2 or newer
Date: 2004-10-19
For emacs-w3m-version: CVS 1.1070
diff -Naur emacs-w3m.orig/w3m-filter.el emacs-w3m/w3m-filter.el
--- emacs-w3m.orig/w3m-filter.el 2004-10-16 14:59:07.000000000 +0200
+++ emacs-w3m/w3m-filter.el 2004-10-19 01:21:46.000000000 +0200
@@ -95,6 +95,6 @@
(while (re-search-forward "&#\\([0-9]+\\);" (max end (point)) t)
(setq ucs (string-to-number (match-string 1)))
(delete-region (match-beginning 0) (match-end 0))
- (insert-char (w3m-ucs-to-char ucs) 1))))))
+ (insert-string (w3m-ucs-to-char ucs)))))))
;;; w3m-filter.el ends here
diff -Naur emacs-w3m.orig/w3m.el emacs-w3m/w3m.el
--- emacs-w3m.orig/w3m.el 2004-10-16 14:59:07.000000000 +0200
+++ emacs-w3m/w3m.el 2004-10-19 00:47:05.000000000 +0200
@@ -2745,14 +2745,63 @@
(cdr elem))))
(eval-and-compile
- (unless (fboundp 'w3m-ucs-to-char)
- (defun w3m-ucs-to-char (codepoint)
- "A dummy function defined since nothing provided the regular function.
-This function simply returns the arg if it is between 0x20 and 0x7e,
-otherwise returns the tilde character."
+ (unless (fboundp 'w3m-ucs-to-string)
+ (defun w3m-ucs-to-string (codepoint)
+ "A dummy function defined since nothing provided the regular
+function. This function returns the string representation of
+CODEPOINT, if it is between 0x20 and 0x7e. Otherwise, we try to map
+CODEPOINT to a string resembling the HTML character entity, and return
+this. If this is not possible, a string with only the tilde character
+is returned."
(if (or (< codepoint 32) (< 127 codepoint))
- ?~ ;; unsupported character
- codepoint))))
+ (cond
+ ;; Non-standard entities
+ ((= codepoint 130) "'") ;; low left single quote
+ ((= codepoint 132) "\"") ;; low left double quote
+ ((= codepoint 133) "...") ;; ellipsis
+ ((= codepoint 134) "^") ;; dagger
+ ((= codepoint 135) "'") ;; double dagger
+ ((= codepoint 136) "^") ;; circumflex
+ ((= codepoint 137) "%.") ;; per mille (thousand)
+ ((= codepoint 139) "<") ;; less-than
+ ((= codepoint 145) "`") ;; left single curly quote
+ ((= codepoint 146) "'") ;; right single curly quote
+ ((= codepoint 147) "\"") ;; left double curly quote
+ ((= codepoint 148) "\"") ;; right double curly quote
+ ((= codepoint 149) "*") ;; bullet
+ ((= codepoint 150) "-") ;; en dash
+ ((= codepoint 151) "--") ;; em dash
+ ((= codepoint 152) "~") ;; tilde
+ ((= codepoint 153) "(tm)") ;; trademark
+ ((= codepoint 155) ">") ;; greater-than
+ ;; Standard (extended) entities
+ ((= codepoint 8211) "-") ;; en dash
+ ((= codepoint 8212) "--") ;; em dash
+ ((= codepoint 8216) "`") ;; left single quote
+ ((= codepoint 8217) "'") ;; right single quote
+ ((= codepoint 8218) "'") ;; single low quote
+ ((= codepoint 8220) "\"") ;; left double quote
+ ((= codepoint 8221) "\"") ;; right double quote
+ ((= codepoint 8222) "\"") ;; double low quote
+ ((= codepoint 8224) "^") ;; dagger
+ ((= codepoint 8225) "^") ;; double dagger
+ ((= codepoint 8226) "*") ;; bullet
+ ((= codepoint 8230) "...") ;; ellipsis
+ ((= codepoint 8240) "%.") ;; per mille (thousand)
+ ((= codepoint 8242) "'") ;; prime, minutes, feet
+ ((= codepoint 8243) "\"") ;; double prime, seconds, inches
+ ((= codepoint 8260) "/") ;; fraction slash
+ ((= codepoint 8482) "(tm)") ;; trademark
+ ((= codepoint 8592) "<-") ;; left arrow
+ ((= codepoint 8593) "^") ;; up arrow
+ ((= codepoint 8594) "->") ;; right arrow
+ ((= codepoint 8595) "v") ;; down arrow
+ ((= codepoint 8596) "<->") ;; left-right arrow
+ ((= codepoint 8656) "<=") ;; left double arrow
+ ((= codepoint 8658) "=>") ;; right double arrow
+ ((= codepoint 8660) "<=>") ;; left-right double arrow
+ (t (char-to-string ?~)))
+ (char-to-string codepoint)))))
(defun w3m-entity-value (name strict)
"Get a char corresponding to NAME from the html char entities database.
@@ -2773,7 +2822,7 @@
(let ((codepoint (if (char-equal (string-to-char name) ?x)
(string-to-number (substring name 1) 16)
(string-to-number name))))
- (char-to-string (w3m-ucs-to-char codepoint))))
+ (w3m-ucs-to-string codepoint)))
(let ((val (intern-soft name w3m-entity-db))
(pre name)
(post ""))
--
Henrik S. Hansen