From dlichteblau at common-lisp.net Wed Oct 3 15:17:08 2007 From: dlichteblau at common-lisp.net (dlichteblau) Date: Wed, 3 Oct 2007 11:17:08 -0400 (EDT) Subject: [cxml-cvs] CVS cxml/dom Message-ID: <20071003151708.C31AB3706A@common-lisp.net> Update of /project/cxml/cvsroot/cxml/dom In directory clnet:/tmp/cvs-serv23055 Modified Files: dom-builder.lisp dom-impl.lisp Log Message: Grow a buffer for string normalization exponentially. * dom/dom-builder.lisp (DOM-BUILDER): New slot `text-buffer'. (SAX:START-ELEMENT, SAX:END-ELEMENT, SAX:START-CDATA, SAX:END-CDATA, SAX:PROCESSING-INSTRUCTION, SAX:COMMENT): Call flush-characters. (SAX:CHARACTERS): Rewritten. (FLUSH-CHARACTERS): New, based on the old sax:characters. * dom/dom-impl.lisp ((initialize-instance :after entity-reference)): Call flush-characters. --- /project/cxml/cvsroot/cxml/dom/dom-builder.lisp 2007/07/22 19:59:26 1.13 +++ /project/cxml/cvsroot/cxml/dom/dom-builder.lisp 2007/10/03 15:17:08 1.14 @@ -18,7 +18,8 @@ (defclass dom-builder () ((document :initform nil :accessor document) (element-stack :initform '() :accessor element-stack) - (internal-subset :accessor internal-subset))) + (internal-subset :accessor internal-subset) + (text-buffer :initform nil :accessor text-buffer))) (defun make-dom-builder () (make-instance 'dom-builder)) @@ -87,6 +88,7 @@ (defmethod sax:start-element ((handler dom-builder) namespace-uri local-name qname attributes) (check-type qname rod) ;catch recoder/builder mismatch + (flush-characters handler) (with-slots (document element-stack) handler (let* ((nsp sax:*namespace-processing*) (element (make-instance 'element @@ -126,27 +128,45 @@ (defmethod sax:end-element ((handler dom-builder) namespace-uri local-name qname) (declare (ignore namespace-uri local-name qname)) + (flush-characters handler) (pop (element-stack handler))) (defmethod sax:characters ((handler dom-builder) data) - (with-slots (document element-stack) handler - (let* ((parent (car element-stack)) - (last-child (dom:last-child parent))) - (cond - ((eq (dom:node-type parent) :cdata-section) - (setf (dom:data parent) data)) - ((and last-child (eq (dom:node-type last-child) :text)) - ;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer - ;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten - ;; erweitern, sonst ist das Dokument nicht normalisiert. - ;; (XXX Oder sollte man besser den Parser entsprechend aendern?) - (dom:append-data last-child data)) - (t - (let ((node (dom:create-text-node document data))) - (setf (slot-value node 'parent) parent) - (fast-push node (slot-value (car element-stack) 'children)))))))) + (with-slots (text-buffer) handler + (cond + ((null text-buffer) + (setf text-buffer data)) + (t + (unless (array-has-fill-pointer-p text-buffer) + (setf text-buffer (make-array (length text-buffer) + :element-type 'rune + :adjustable t + :fill-pointer t + :initial-contents text-buffer))) + (let ((n (length text-buffer)) + (m (length data))) + (adjust-vector-exponentially text-buffer (+ n m) t) + (move data text-buffer 0 n m)))))) + +(defun flush-characters (handler) + (with-slots (document element-stack text-buffer) handler + (let ((data text-buffer)) + (when data + (when (array-has-fill-pointer-p data) + (setf data + (make-array (length data) + :element-type 'rune + :initial-contents data))) + (let ((parent (car element-stack))) + (if (eq (dom:node-type parent) :cdata-section) + (setf (dom:data parent) data) + (let ((node (dom:create-text-node document data))) + (setf (slot-value node 'parent) parent) + (fast-push node (slot-value (car element-stack) 'children))))) + (setf text-buffer nil))))) (defmethod sax:start-cdata ((handler dom-builder)) + (flush-characters handler) (with-slots (document element-stack) handler (let ((node (dom:create-cdata-section document #"")) (parent (car element-stack))) @@ -155,10 +175,12 @@ (push node element-stack)))) (defmethod sax:end-cdata ((handler dom-builder)) + (flush-characters handler) (let ((node (pop (slot-value handler 'element-stack)))) (assert (eq (dom:node-type node) :cdata-section)))) (defmethod sax:processing-instruction ((handler dom-builder) target data) + (flush-characters handler) (with-slots (document element-stack) handler (let ((node (dom:create-processing-instruction document target data)) (parent (car element-stack))) @@ -166,6 +188,7 @@ (fast-push node (slot-value (car element-stack) 'children))))) (defmethod sax:comment ((handler dom-builder) data) + (flush-characters handler) (with-slots (document element-stack) handler (let ((node (dom:create-comment document data)) (parent (car element-stack))) --- /project/cxml/cvsroot/cxml/dom/dom-impl.lisp 2006/09/10 14:52:44 1.42 +++ /project/cxml/cvsroot/cxml/dom/dom-impl.lisp 2007/10/03 15:17:08 1.43 @@ -1247,7 +1247,8 @@ (push instance (element-stack handler)) #+cxml-system::utf8dom-file (setf handler (cxml:make-recoder handler #'cxml:rod-to-utf8-string)) - (funcall resolver (real-rod (dom:name instance)) handler))) + (funcall resolver (real-rod (dom:name instance)) handler) + (flush-characters handler))) (labels ((walk (n) (setf (slot-value n 'read-only-p) t) (when (dom:element-p n) From dlichteblau at common-lisp.net Wed Oct 3 15:21:57 2007 From: dlichteblau at common-lisp.net (dlichteblau) Date: Wed, 3 Oct 2007 11:21:57 -0400 (EDT) Subject: [cxml-cvs] CVS cxml/xml Message-ID: <20071003152157.3FB9B3706A@common-lisp.net> Update of /project/cxml/cvsroot/cxml/xml In directory clnet:/tmp/cvs-serv25157 Modified Files: xml-parse.lisp Log Message: Fixed time and space usage in cases where entity references follow each other (thanks to Ivan Shvedunov for the report). * xml/xml-parse.lisp (P/CONTENT): Removed useless call to append. Use loop instead of tail recursion. --- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/08/05 11:15:48 1.72 +++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/10/03 15:21:56 1.73 @@ -2800,45 +2800,39 @@ (defun p/content (input) ;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* - (multiple-value-bind (cat sem) (peek-token input) - (case cat - ((:stag :ztag) - (p/element input) - (p/content input)) - ((:CDATA) - (process-characters input sem) - (sax:characters (handler *ctx*) sem) - (p/content input)) - ((:ENTITY-REF) - (let ((name sem)) - (consume-token input) - (append - (recurse-on-entity input name :general - (lambda (input) - (prog1 - (etypecase (checked-get-entdef name :general) - (internal-entdef (p/content input)) - (external-entdef (p/ext-parsed-ent input))) - (unless (eq (peek-token input) :eof) - (wf-error input "Trailing garbage. - ~S" - (peek-token input)))))) - (p/content input)))) - ((: Update of /project/cxml/cvsroot/cxml/klacks In directory clnet:/tmp/cvs-serv25228 Modified Files: klacks.lisp package.lisp Log Message: Minor klacks enhancements. klacks/klacks.lisp (CONSUME-CHARACTERS): New function. klacks/package.lisp (KLACKS): Export dribble-handler and consume-characters. --- /project/cxml/cvsroot/cxml/klacks/klacks.lisp 2007/05/01 18:21:41 1.8 +++ /project/cxml/cvsroot/cxml/klacks/klacks.lisp 2007/10/03 15:22:57 1.9 @@ -79,6 +79,12 @@ (check-type key (member :characters)) characters)) +(defun klacks:consume-characters (source) + (with-output-to-string (s) + (while (eq (klacks:peek source) :characters) + (write-string (klacks:current-characters source) s) + (klacks:consume source)))) + (defun klacks:serialize-event (source handler &key (consume t)) (multiple-value-bind (key a b c) (klacks:peek source) (let ((result nil)) --- /project/cxml/cvsroot/cxml/klacks/package.lisp 2007/05/01 18:21:41 1.6 +++ /project/cxml/cvsroot/cxml/klacks/package.lisp 2007/10/03 15:22:57 1.7 @@ -21,8 +21,10 @@ (:export #:source #:close-source #:with-open-source + #:tapping-source #:make-tapping-source + #:dribble-handler #:peek #:peek-value @@ -42,6 +44,7 @@ #:current-lname #:current-qname #:current-characters + #:consume-characters #:current-cdata-section-p #:map-current-namespace-declarations From dlichteblau at common-lisp.net Wed Oct 3 15:23:40 2007 From: dlichteblau at common-lisp.net (dlichteblau) Date: Wed, 3 Oct 2007 11:23:40 -0400 (EDT) Subject: [cxml-cvs] CVS cxml/doc Message-ID: <20071003152340.842514322A@common-lisp.net> Update of /project/cxml/cvsroot/cxml/doc In directory clnet:/tmp/cvs-serv25288/doc Modified Files: index.xml Log Message: web page update --- /project/cxml/cvsroot/cxml/doc/index.xml 2007/08/29 12:11:17 1.17 +++ /project/cxml/cvsroot/cxml/doc/index.xml 2007/10/03 15:23:40 1.18 @@ -53,8 +53,18 @@

Recent Changes

rel-2007-xx-yy

rel-2007-08-05