[climacs-cvs] CVS update: climacs/html-syntax.lisp

Robert Strandh rstrandh at common-lisp.net
Sun Mar 13 06:55:29 UTC 2005


Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv5617

Modified Files:
	html-syntax.lisp 
Log Message:
A step on the way to factoring out the incremental lexer.

Date: Sun Mar 13 07:55:28 2005
Author: rstrandh

Index: climacs/html-syntax.lisp
diff -u climacs/html-syntax.lisp:1.10 climacs/html-syntax.lisp:1.11
--- climacs/html-syntax.lisp:1.10	Fri Mar 11 11:25:58 2005
+++ climacs/html-syntax.lisp	Sun Mar 13 07:55:27 2005
@@ -41,6 +41,21 @@
   (with-slots (start-mark size) tree
      (+ (offset start-mark) size)))
 
+(defclass lexer ()
+  ((buffer :initarg :buffer :reader buffer)))
+
+(defgeneric nb-lexemes (lexer))
+(defgeneric lexeme (lexer pos))
+
+(defclass incremental-lexer (lexer)
+  ((lexemes :initform (make-instance 'standard-flexichain) :reader lexemes)))
+
+(defmethod nb-lexemes ((lexer incremental-lexer))
+  (nb-elements (lexemes lexer)))
+
+(defmethod lexeme ((lexer incremental-lexer) pos)
+  (element* (lexemes lexer) pos))
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;; grammar classes
@@ -132,30 +147,23 @@
 (defclass word (html-element) ())
 (defclass delimiter (html-element) ())
 
-(defun next-token (scan)
-  (let ((start-mark (clone-mark scan)))
-    (flet ((fo () (forward-object scan)))
-      (macrolet ((make-entry (type)
-		   `(return-from next-token
-		      (make-instance ,type :start-mark start-mark
-				     :size (- (offset scan) (offset start-mark))))))
-	(loop with object = (object-after scan)
-	      until (end-of-buffer-p scan)
-	      do (case object
-		   (#\< (fo) (make-entry 'tag-start))
-		   (#\> (fo) (make-entry 'tag-end))
-		   (#\/ (fo) (make-entry 'slash))
-		   (t (cond ((alphanumericp object)
-			     (loop until (end-of-buffer-p scan)
-				   while (alphanumericp (object-after scan))
-				   do (fo))
-			     (make-entry 'word))
-			    (t
-			     (fo) (make-entry 'delimiter))))))))))
+(defun next-lexeme (scan)
+  (flet ((fo () (forward-object scan)))
+    (let ((object (object-after scan)))
+      (case object
+	(#\< (fo) (make-instance 'tag-start))
+	(#\> (fo) (make-instance 'tag-end))
+	(#\/ (fo) (make-instance 'slash))
+	(t (cond ((alphanumericp object)
+		  (loop until (end-of-buffer-p scan)
+			while (alphanumericp (object-after scan))
+			do (fo))
+		  (make-instance 'word))
+		 (t
+		  (fo) (make-instance 'delimiter))))))))
 
 (define-syntax html-syntax ("HTML" (basic-syntax))
-  ((tokens :initform (make-instance 'standard-flexichain))
-   (guess-pos :initform 1)
+  ((lexemes :initform (make-instance 'standard-flexichain))
    (valid-parse :initform 1)
    (parser)))
 
@@ -264,11 +272,11 @@
 
 (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
   (declare (ignore args))
-  (with-slots (parser tokens buffer) syntax
+  (with-slots (parser lexemes buffer) syntax
      (setf parser (make-instance 'parser
 		     :grammar *html-grammar*
 		     :target 'html))
-     (insert* tokens 0 (make-instance 'start-element
+     (insert* lexemes 0 (make-instance 'start-element
 			  :start-mark (make-instance 'standard-left-sticky-mark
 					 :buffer buffer
 					 :offset 0)
@@ -280,52 +288,65 @@
 ;;; update syntax
 
 (defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
-  (with-slots (parser tokens valid-parse) syntax
-     (loop until (= valid-parse (nb-elements tokens))
-	   while (mark<= (end-offset (element* tokens valid-parse)) bot)
-	   do (let ((current-token (element* tokens (1- valid-parse)))
-		    (next-token (element* tokens valid-parse)))
-		(setf (slot-value next-token 'state)
-		      (advance-parse parser (list next-token) (slot-value current-token 'state))))
+  (with-slots (parser lexemes valid-parse) syntax
+     (loop until (= valid-parse (nb-elements lexemes))
+	   while (mark<= (end-offset (element* lexemes valid-parse)) bot)
+	   do (let ((current-token (element* lexemes (1- valid-parse)))
+		    (next-lexeme (element* lexemes valid-parse)))
+		(setf (slot-value next-lexeme 'state)
+		      (advance-parse parser (list next-lexeme) (slot-value current-token 'state))))
 	      (incf valid-parse))))
 
+(defun delete-invalid-lexemes (lexemes from to)
+  "delete all lexemes between FROM and TO and return the first invalid 
+position in LEXEMES"
+  (let ((start 1)
+	(end (nb-elements lexemes)))
+    ;; use binary search to find the first lexeme to delete
+    (loop while (< start end)
+	  do (let ((middle (floor (+ start end) 2)))
+	       (if (mark< (end-offset (element* lexemes middle)) from)
+		   (setf start (1+ middle))
+		   (setf end middle))))
+    ;; delete lexemes
+    (loop until (or (= start (nb-elements lexemes))
+		    (mark> (start-mark (element* lexemes start)) to))
+	  do (delete* lexemes start))
+    start))
+	       
+
+(defun inter-lexeme-object-p (lexemes object)
+  (declare (ignore lexemes))
+  (whitespacep object))
+
+(defun skip-inter-lexeme-objects (lexemes scan)
+  (loop until (end-of-buffer-p scan)
+	while (inter-lexeme-object-p lexemes (object-after scan))
+	do (forward-object scan)))
+
+(defun update-lex (lexemes start-pos end)
+  (let ((scan (make-instance 'standard-left-sticky-mark
+		 :buffer (buffer end) ; FIXME, eventually use the buffer of the lexer
+		 :offset (end-offset (element* lexemes (1- start-pos))))))
+    (loop do (skip-inter-lexeme-objects lexemes scan)
+	  until (if (end-of-buffer-p end)
+		    (end-of-buffer-p scan)
+		    (mark> scan end))
+	  do (let* ((start-mark (clone-mark scan))
+		    (lexeme (next-lexeme scan))
+		    (size (- (offset scan) (offset start-mark))))
+	       (setf (slot-value lexeme 'start-mark) start-mark
+		     (slot-value lexeme 'size) size)
+	       (insert* lexemes start-pos lexeme))
+	     (incf start-pos))))
+
 (defmethod update-syntax (buffer (syntax html-syntax))
-  (let ((low-mark (low-mark buffer))
-	(high-mark (high-mark buffer))
-	(scan))
-    (with-slots (tokens guess-pos valid-parse) syntax
-       (when (mark<= low-mark high-mark)
-	 ;; go back to a position before low-mark
-	 (loop until (or (= guess-pos 1)
-			 (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
-	       do (decf guess-pos))
-	 ;; go forward to the last position before low-mark
-	 (loop with nb-elements = (nb-elements tokens)
-	       until (or (= guess-pos nb-elements)
-			 (mark>= (end-offset (element* tokens guess-pos)) low-mark))
-	       do (incf guess-pos))
-	 ;; mark valid parse
-	 (setf valid-parse guess-pos)
-	 ;; delete entries that must be reparsed
-	 (loop until (or (= guess-pos (nb-elements tokens))
-			 (mark> (start-mark (element* tokens guess-pos)) high-mark))
-	       do (delete* tokens guess-pos))
-	 (setf scan (make-instance 'standard-left-sticky-mark
-		       :buffer buffer
-		       :offset (if (zerop guess-pos)
-				   0
-				   (end-offset (element* tokens (1- guess-pos))))))
-	 ;; scan
-	 (loop with start-mark = nil
-	       do (loop until (end-of-buffer-p scan)
-			while (whitespacep (object-after scan))
-			do (forward-object scan))
-	       until (if (end-of-buffer-p high-mark)
-			 (end-of-buffer-p scan)
-			 (mark> scan high-mark))
-	       do (setf start-mark (clone-mark scan))
-		  (insert* tokens guess-pos (next-token scan))
-		  (incf guess-pos))))))
+  (with-slots (lexemes valid-parse) syntax
+     (let* ((low-mark (low-mark buffer))
+	    (high-mark (high-mark buffer))
+	    (first-invalid-position (delete-invalid-lexemes lexemes low-mark high-mark)))
+       (setf valid-parse first-invalid-position)
+       (update-lex lexemes first-invalid-position high-mark))))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
@@ -451,35 +472,35 @@
      (setf *cursor-positions* (make-array (1+ (number-of-lines-in-region top bot)))
 	   *current-line* 0
 	   (aref *cursor-positions* 0) (stream-cursor-position pane))
-     (with-slots (tokens) syntax
-	(let ((average-token-size (max (float (/ (size (buffer pane)) (nb-elements tokens)))
+     (with-slots (lexemes) syntax
+	(let ((average-token-size (max (float (/ (size (buffer pane)) (nb-elements lexemes)))
 				       1.0)))
 	  ;; find the last token before bot
 	  (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
 	    ;; go back to a token before bot
-	    (loop until (mark<= (end-offset (element* tokens (1- end-token-index))) bot)
+	    (loop until (mark<= (end-offset (element* lexemes (1- end-token-index))) bot)
 		  do (decf end-token-index))
 	    ;; go forward to the last token before bot
-	    (loop until (or (= end-token-index (nb-elements tokens))
-			    (mark> (start-offset (element* tokens end-token-index)) bot))
+	    (loop until (or (= end-token-index (nb-elements lexemes))
+			    (mark> (start-offset (element* lexemes end-token-index)) bot))
 		  do (incf end-token-index))
 	    (let ((start-token-index end-token-index))
 	      ;; go back to the first token after top, or until the previous token
 	      ;; contains a valid parser state
-	      (loop until (or (mark<= (end-offset (element* tokens (1- start-token-index))) top)
+	      (loop until (or (mark<= (end-offset (element* lexemes (1- start-token-index))) top)
 			      (not (parse-state-empty-p 
-				    (slot-value (element* tokens (1- start-token-index)) 'state))))
+				    (slot-value (element* lexemes (1- start-token-index)) 'state))))
 		    do (decf start-token-index))
 	      (let ((*white-space-start* (offset top)))
 		;; display the parse tree if any
-		(unless (parse-state-empty-p (slot-value (element* tokens (1- start-token-index)) 'state))
-		  (display-parse-state (slot-value (element* tokens (1- start-token-index)) 'state)
+		(unless (parse-state-empty-p (slot-value (element* lexemes (1- start-token-index)) 'state))
+		  (display-parse-state (slot-value (element* lexemes (1- start-token-index)) 'state)
 				       syntax
 				       pane))
-		;; display the tokens
+		;; display the lexemes
 		(with-drawing-options (pane :ink +red+)
 		  (loop while (< start-token-index end-token-index)
-			do (let ((token (element* tokens start-token-index)))
+			do (let ((token (element* lexemes start-token-index)))
 			     (display-parse-tree token syntax pane))
 			   (incf start-token-index))))))))
      (let* ((cursor-line (number-of-lines-in-region top (point pane)))




More information about the Climacs-cvs mailing list