[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Refactoring xml.el namespace handling
From: |
Mark A. Hershberger |
Subject: |
Re: Refactoring xml.el namespace handling |
Date: |
Wed, 03 Mar 2004 00:35:42 -0600 |
User-agent: |
Gnus/5.110002 (No Gnus v0.2) Emacs/22.0.0 (gnu/linux) |
Stefan Monnier <address@hidden> writes:
> Could you send a patch relative to the CVS head version?
> Your patch does not apply cleanly to the current code.
Argh. My CVS skillz are lacking.
Now that I've gotten a good HEAD, here is a fresh diff.
2004-03-03 Mark A. Hershberger <address@hidden>
* xml.el (xml-maybe-do-ns): new function to handle namespace
parsing of both attribute and element names.
(xml-ns-parse-ns-attrs, xml-ns-expand-el, xml-ns-expand-attr,
xml-intern-attrlist): Removed in favor of xml-maybe-do-ns to avoid
un-necessary intern-ing.
(xml-parse-tag): Updated assumed namespaces. Cleaned up namespace
parsing.
(xml-parse-attlist): Now does its own namespace parsing work.
(xml-parse-dtd): Updated <!ELEMENT parsing to accept elements with
hyphens. Now skips ENTITY, ATTLIST, and NOTATION instead of
barfing.
--- xml.el 2 Mar 2004 21:45:06 -0000 1.30
+++ xml.el 3 Mar 2004 06:17:00 -0000
@@ -52,15 +52,15 @@
;;; LIST FORMAT
-;; The functions `xml-parse-file' and `xml-parse-tag' return a list with
-;; the following format:
+;; The functions `xml-parse-file', `xml-parse-region' and
+;; `xml-parse-tag' return a list with the following format:
;;
;; xml-list ::= (node node ...)
-;; node ::= (tag_name attribute-list . child_node_list)
+;; node ::= (qname attribute-list . child_node_list)
;; child_node_list ::= child_node child_node ...
;; child_node ::= node | string
-;; tag_name ::= string
-;; attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...)
+;; qname ::= (:namespace-uri . "name") | "name"
+;; attribute_list ::= ((qname . "value") (qname . "value") ...)
;; | nil
;; string ::= "..."
;;
@@ -68,6 +68,11 @@
;; Whitespace is preserved. Fixme: There should be a tree-walker that
;; can remove it.
+;; TODO:
+;; * xml:base, xml:space support
+;; * more complete DOCTYPE parsing
+;; * pi support
+
;;; Code:
;; Note that {buffer-substring,match-string}-no-properties were
@@ -230,72 +235,37 @@
(cons dtd (nreverse xml))
(nreverse xml)))))))
-(defun xml-ns-parse-ns-attrs (attr-list &optional xml-ns)
- "Parse the namespace attributes and return a list of cons in the form:
-\(namespace . prefix)"
-
- (mapcar
- (lambda (attr)
- (let* ((splitup (split-string (car attr) ":"))
- (prefix (nth 0 splitup))
- (lname (nth 1 splitup)))
- (when (string= "xmlns" prefix)
- (push (cons (if lname
- lname
- "")
- (cdr attr))
- xml-ns)))) attr-list)
- xml-ns)
-
-;; expand element names
-(defun xml-ns-expand-el (el xml-ns)
- "Expand the XML elements from \"prefix:local-name\" to a cons in the form
-\"(namespace . local-name)\"."
-
- (let* ((splitup (split-string el ":"))
- (lname (or (nth 1 splitup)
- (nth 0 splitup)))
- (prefix (if (nth 1 splitup)
- (nth 0 splitup)
- (if (string= lname "xmlns")
- "xmlns"
- "")))
- (ns (cdr (assoc-string prefix xml-ns))))
- (if (string= "" ns)
- lname
- (cons (intern (concat ":" ns))
- lname))))
-
-;; expand attribute names
-(defun xml-ns-expand-attr (attr-list xml-ns)
- "Expand the attribute list for a particular element from the form
-\"prefix:local-name\" to the form \"{namespace}:local-name\"."
-
- (mapcar
- (lambda (attr)
- (let* ((splitup (split-string (car attr) ":"))
+(defun xml-maybe-do-ns (name default xml-ns)
+ "Perform any namespace expansion. NAME is the name to perform the expansion
on.
+DEFAULT is the default namespace. XML-NS is a cons of namespace
+names to uris. When namespace-aware parsing is off, then XML-NS
+is nil.
+
+During namespace-aware parsing, any name without a namespace is
+put into the namespace identified by DEFAULT. nil is used to
+specify that the name shouldn't be given a namespace."
+ (if (consp xml-ns)
+ (let* ((splitup (split-string name ":"))
(lname (or (nth 1 splitup)
(nth 0 splitup)))
(prefix (if (nth 1 splitup)
(nth 0 splitup)
- (if (string= (car attr) "xmlns")
- "xmlns"
- "")))
- (ns (cdr (assoc-string prefix xml-ns))))
- (setcar attr
- (if (string= "" ns)
- lname
- (cons (intern (concat ":" ns))
- lname)))))
- attr-list)
- attr-list)
-
-(defun xml-intern-attrlist (attr-list)
- "Convert attribute names to symbols for backward compatibility."
- (mapcar (lambda (attr)
- (setcar attr (intern (car attr))))
- attr-list)
- attr-list)
+ default))
+ (ns (progn
+ (if (and
+ (string-equal lname "xmlns")
+ (not prefix))
+ (cdr (assoc "xmlns" xml-ns))
+ (cdr (assoc prefix xml-ns))))))
+ (if ns
+ (cons ns
+ (if (and
+ (string-equal lname "xmlns")
+ (not prefix))
+ ""
+ lname))
+ lname))
+ (intern name)))
(defun xml-parse-tag (&optional parse-dtd parse-ns)
"Parse the tag at point.
@@ -308,12 +278,12 @@
- a pair : the first element is the DTD, the second is the node."
(let ((xml-ns (if (consp parse-ns)
parse-ns
- (if parse-ns
+ (when parse-ns
(list
- ;; Default no namespace
- (cons "" "")
+ ;; "xml" namespace
+ (cons "xml" :http://www.w3.org/XML/1998/namespace)
;; We need to seed the xmlns namespace
- (cons "xmlns" "http://www.w3.org/2000/xmlns/"))))))
+ (cons "xmlns" :http://www.w3.org/2000/xmlns/))))))
(cond
;; Processing instructions (like the <?xml version="1.0"?> tag at the
;; beginning of a document).
@@ -350,19 +320,25 @@
;; Parse this node
(let* ((node-name (match-string 1))
- (attr-list (xml-parse-attlist))
- (children (if (consp xml-ns) ;; take care of namespace parsing
- (progn
- (setq xml-ns (xml-ns-parse-ns-attrs
- attr-list xml-ns))
- (list (xml-ns-expand-attr
- attr-list xml-ns)
- (xml-ns-expand-el
- node-name xml-ns)))
- (list (xml-intern-attrlist attr-list)
- (intern node-name))))
+ ;; Parse the attribute list.
+ (children (list (xml-parse-attlist xml-ns) node-name))
pos)
+ ;; add the xmlns:* attrs to our cache
+ (when (consp xml-ns)
+ (mapcar
+ (lambda (attr)
+ (when (and (listp (car attr))
+ (eq :http://www.w3.org/2000/xmlns/
+ (caar attr)))
+ (setq xml-ns (append (list (cons (cdar attr)
+ (intern (concat ":" (cdr
attr)))))
+ xml-ns))))
+ (car children)))
+
+ ;; expand element names
+ (setcdr children (list (xml-maybe-do-ns (cadr children) "" xml-ns)))
+
;; is this an empty element ?
(if (looking-at "/>")
(progn
@@ -416,7 +392,7 @@
(t ;; This is not a tag.
(error "XML: Invalid character")))))
-(defun xml-parse-attlist ()
+(defun xml-parse-attlist (&optional xml-ns)
"Return the attribute-list after point. Leave point at the
first non-blank character after the tag."
(let ((attlist ())
@@ -424,8 +400,9 @@
(skip-syntax-forward " ")
(while (looking-at (eval-when-compile
(concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
- (setq name (match-string 1))
- (goto-char (match-end 0))
+ (setq end-pos (match-end 0))
+ (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
+ (goto-char end-pos)
;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
@@ -527,7 +504,7 @@
;; Translation of rule [45] of XML specifications
((looking-at
- "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
+ "<!ELEMENT\\s-+\\([[:alnum:].%;-]+\\)\\s-+\\([^>]+\\)>")
(setq element (match-string 1)
type (match-string-no-properties 2))
@@ -556,7 +533,15 @@
(goto-char end-pos))
((looking-at "<!--")
(search-forward "-->"))
-
+ ((looking-at "<!ENTITY\\s-+\\([[:alnum:].%;-]+\\)\\s-+\\([^>]+\\)>")
+ ; Put the ENTITY in
+ (goto-char (match-end 0)))
+ ((looking-at "<!ATTLIST\\s+\\([[:alnum:].%;-]+\\)\\s+\\([^>]+\\)>")
+ ; Put in the ATTLIST
+ (goto-char (match-end 0)))
+ ((looking-at "<!NOTATION\\s+\\([[:alnum:].%;-]+\\)\\s+\\([^>]+\\)>")
+ ; Put in the NOTATION
+ (goto-char (match-end 0)))
(t
(error "XML: Invalid DTD item")))
--
A choice between one man and a shovel, or a dozen men with teaspoons
is clear to me, and I'm sure it is clear to you also.
-- Zimran Ahmed <http://www.winterspeak.com/>