[climacs-cvs] CVS update: climacs/syntax.lisp climacs/html-syntax.lisp

Robert Strandh rstrandh at common-lisp.net
Mon May 9 13:12:50 UTC 2005


Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv8821

Modified Files:
	syntax.lisp html-syntax.lisp 
Log Message:
Prediction is now done at the beginning of advance-parse, which
means the next token is available to the predictor.

Added a :predict-test to the add-rule macro making it possible to
control when prediction is reasonable.

Added :predict-test to a few rules of HTML syntax to speed up the parser. 


Date: Mon May  9 15:12:47 2005
Author: rstrandh

Index: climacs/syntax.lisp
diff -u climacs/syntax.lisp:1.48 climacs/syntax.lisp:1.49
--- climacs/syntax.lisp:1.48	Mon May  2 11:05:00 2005
+++ climacs/syntax.lisp	Mon May  9 15:12:47 2005
@@ -205,6 +205,7 @@
   ((left-hand-side :initarg :left-hand-side :reader left-hand-side)
    (right-hand-side :initarg :right-hand-side :reader right-hand-side)
    (symbols :initarg :symbols :reader symbols)
+   (predict-test :initarg :predict-test :reader predict-test)
    (number)))
 
 (defclass grammar ()
@@ -212,7 +213,7 @@
    (hash :initform (make-hash-table) :accessor hash)
    (number-of-rules :initform 0)))
 
-(defmacro grammar-rule ((left-hand-side arrow arglist &body body))
+(defmacro grammar-rule ((left-hand-side arrow arglist &body body) &key predict-test)
   (declare (ignore arrow))
   (labels ((var-of (arg)
 	     (if (symbolp arg)
@@ -244,7 +245,8 @@
 			     (symbolp (car body)))
 			 `(make-instance ',left-hand-side , at body)
 			 `(progn , at body)))
-	:symbols ,(coerce (mapcar #'sym-of arglist) 'vector))))
+	:symbols ,(coerce (mapcar #'sym-of arglist) 'vector)
+	:predict-test ,predict-test)))
 
 
 (defmacro grammar (&body body)
@@ -308,14 +310,15 @@
     (cond ((null remaining)
 	   nil)
 	  ((functionp remaining)
-	   (handle-incomplete-item (make-instance 'incomplete-item
-				      :orig-state (orig-state prev-item)
-				      :predicted-from (predicted-from prev-item)
-				      :rule (rule prev-item)
-				      :dot-position (1+ (dot-position prev-item))
-				      :parse-trees (cons parse-tree (parse-trees prev-item))
-				      :suffix remaining)
-				   orig-state to-state))
+	   (handle-incomplete-item
+	    (make-instance 'incomplete-item
+	       :orig-state (orig-state prev-item)
+	       :predicted-from (predicted-from prev-item)
+	       :rule (rule prev-item)
+	       :dot-position (1+ (dot-position prev-item))
+	       :parse-trees (cons parse-tree (parse-trees prev-item))
+	       :suffix remaining)
+	    orig-state to-state))
 	  (t
 	   (let* ((parse-trees (cons parse-tree (parse-trees prev-item)))
 		  (start (find-if-not #'null parse-trees
@@ -389,30 +392,45 @@
 	    (t (push parse-tree (gethash from-state parse-trees))
 	       (handle-parse-tree))))))
 
+(defun predict (item state tokens)
+  (dolist (rule (gethash (aref (symbols (rule item)) (dot-position item))
+			 (hash (parser-grammar (parser state)))))
+    (if (functionp (right-hand-side rule))
+	(let ((predicted-rules (slot-value state 'predicted-rules))
+	      (rule-number (slot-value rule 'number))
+	      (predict-test (predict-test rule)))
+	  (when (zerop (sbit predicted-rules rule-number))
+	    (setf (sbit predicted-rules rule-number) 1)
+	    (when (or (null predict-test)
+		      (some predict-test tokens))
+	      (handle-and-predict-incomplete-item
+	       (make-instance 'incomplete-item
+		  :orig-state state
+		  :predicted-from item
+		  :rule rule
+		  :dot-position 0
+		  :suffix (right-hand-side rule))
+	       state tokens))))
+	(potentially-handle-parse-tree (right-hand-side rule) state state)))
+  (loop for parse-tree in (gethash state (parse-trees state))
+	do (derive-and-handle-item item parse-tree state state)))
+
 (defun handle-incomplete-item (item orig-state to-state)
   (declare (optimize speed))
   (cond ((find item (the list (gethash orig-state (incomplete-items to-state)))
  	       :test #'item-equal)
 	  nil)
  	(t
- 	 (push item (gethash orig-state (incomplete-items to-state)))
-	 (dolist (rule (gethash (aref (symbols (rule item)) (dot-position item))
-				(hash (parser-grammar (parser to-state)))))
-	   (if (functionp (right-hand-side rule))
-	       (let ((predicted-rules (slot-value to-state 'predicted-rules))
-		     (rule-number (slot-value rule 'number)))
-		 (when (zerop (sbit predicted-rules rule-number))
-		   (setf (sbit predicted-rules rule-number) 1)
-		   (handle-incomplete-item (make-instance 'incomplete-item
-					      :orig-state to-state
-					      :predicted-from item
-					      :rule rule
-					      :dot-position 0
-					      :suffix (right-hand-side rule))
-					   to-state to-state)))
-	       (potentially-handle-parse-tree (right-hand-side rule) to-state to-state)))
-	 (loop for parse-tree in (gethash to-state (parse-trees to-state))
- 	       do (derive-and-handle-item item parse-tree to-state to-state)))))
+ 	 (push item (gethash orig-state (incomplete-items to-state))))))
+
+(defun handle-and-predict-incomplete-item (item state tokens)
+  (declare (optimize speed))
+  (cond ((find item (the list (gethash state (incomplete-items state)))
+ 	       :test #'item-equal)
+	  nil)
+ 	(t
+ 	 (push item (gethash state (incomplete-items state)))
+	 (predict item state tokens))))
 
 (defmethod initialize-instance :after ((parser parser) &rest args)
   (declare (ignore args))
@@ -424,13 +442,14 @@
 		      (or (subtypep (target parser) sym)
 			  (subtypep sym (target parser))))
 		(if (functionp (right-hand-side rule))
-		    (handle-incomplete-item (make-instance 'incomplete-item
-					       :orig-state initial-state
-					       :predicted-from nil
-					       :rule rule
-					       :dot-position 0
-					       :suffix (right-hand-side rule))
-					    initial-state initial-state)
+		    (handle-incomplete-item
+		     (make-instance 'incomplete-item
+			:orig-state initial-state
+			:predicted-from nil
+			:rule rule
+			:dot-position 0
+			:suffix (right-hand-side rule))
+		     initial-state initial-state)
 		    (potentially-handle-parse-tree
 		     (right-hand-side rule) initial-state initial-state))))))
 
@@ -442,6 +461,11 @@
 	  do (return parse-tree)))
 
 (defun advance-parse (parser tokens state)
+  (maphash (lambda (from-state items)
+	     (declare (ignore from-state))
+	     (dolist (item items)
+	       (predict item state tokens)))
+	   (incomplete-items state))
   (let ((new-state (make-instance 'parser-state :parser parser)))
     (loop for token in tokens 
 	  do (potentially-handle-parse-tree token state new-state))


Index: climacs/html-syntax.lisp
diff -u climacs/html-syntax.lisp:1.29 climacs/html-syntax.lisp:1.30
--- climacs/html-syntax.lisp:1.29	Mon Apr 11 08:27:13 2005
+++ climacs/html-syntax.lisp	Mon May  9 15:12:47 2005
@@ -22,6 +22,11 @@
 
 (in-package :climacs-html-syntax)
 
+(define-syntax html-syntax ("HTML" (basic-syntax))
+  ((lexer :reader lexer)
+   (valid-parse :initform 1)
+   (parser)))
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;; grammar classes
@@ -40,6 +45,31 @@
 
 (defclass html-tag (html-token) ())
 
+(defclass html-start-tag (html-tag)
+  ((start :initarg :start)
+   (name :initarg :name)
+   (attributes :initform nil :initarg :attributes)
+   (end :initarg :end)))
+
+(defmethod display-parse-tree ((entity html-start-tag) (syntax html-syntax) pane)
+  (with-slots (start name attributes end) entity
+    (display-parse-tree start syntax pane)
+    (display-parse-tree name syntax pane)
+    (unless (null attributes)
+      (display-parse-tree attributes syntax pane))
+    (display-parse-tree end syntax pane)))
+
+(defclass html-end-tag (html-tag)
+  ((start :initarg :start)
+   (name :initarg :name)
+   (end :initarg :end)))
+
+(defmethod display-parse-tree ((entity html-end-tag) (syntax html-syntax) pane)
+  (with-slots (start name attributes end) entity
+    (display-parse-tree start syntax pane)
+    (display-parse-tree name syntax pane)
+    (display-parse-tree end syntax pane)))
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;; lexer
@@ -74,19 +104,15 @@
 		 (t
 		  (fo) (make-instance 'delimiter))))))))
 
-(define-syntax html-syntax ("HTML" (basic-syntax))
-  ((lexer :reader lexer)
-   (valid-parse :initform 1)
-   (parser)))
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;; parser
 
 (defparameter *html-grammar* (grammar))
 
-(defmacro add-html-rule (rule)
-  `(add-rule (grammar-rule ,rule) *html-grammar*))
+(defmacro add-html-rule (rule &key predict-test)
+  `(add-rule (grammar-rule ,rule :predict-test ,predict-test)
+	     *html-grammar*))
 
 (defun word-is (word string)
   (string-equal (coerce (buffer-sequence (buffer word) (start-offset word) (end-offset word)) 'string)
@@ -94,23 +120,27 @@
 
 (defmacro define-start-tag (name string)
   `(progn
-     (defclass ,name (html-tag) ())
+     (defclass ,name (html-start-tag) ())
 
      (add-html-rule
       (,name -> (start-tag-start
 		 (word (and (= (end-offset start-tag-start) (start-offset word))
 			    (word-is word ,string)))
-		 (tag-end (= (end-offset word) (start-offset tag-end))))))))
+		 (tag-end (= (end-offset word) (start-offset tag-end))))
+	     :start start-tag-start :name word :end tag-end))))
 
 (defmacro define-end-tag (name string)
   `(progn
-     (defclass ,name (html-tag) ())
+     (defclass ,name (html-end-tag) ())
 
      (add-html-rule
       (,name -> (end-tag-start
 		 (word (and (= (end-offset end-tag-start) (start-offset word))
 			    (word-is word ,string)))
-		 (tag-end (= (end-offset word) (start-offset tag-end))))))))
+		 (tag-end (= (end-offset word) (start-offset tag-end))))
+	     :start end-tag-start :name word :end tag-end)
+      :predict-test (lambda (token)
+		      (typep token 'end-tag-start)))))
 
 (defmacro define-tag-pair (start-name end-name string)
   `(progn (define-start-tag ,start-name ,string)
@@ -310,7 +340,9 @@
 (defclass $inline (html-nonterminal)
   ((contents :initarg :contents)))
      
-(add-html-rule ($inline -> (inline-element) :contents inline-element))
+(add-html-rule ($inline -> (inline-element) :contents inline-element)
+	       :predict-test (lambda (token)
+			       (typep token 'start-tag-start)))
 (add-html-rule ($inline -> (word) :contents word))
 (add-html-rule ($inline -> (delimiter) :contents delimiter))
 
@@ -326,7 +358,9 @@
   ((contents :initarg :contents)))
      
 (add-html-rule ($flow -> ($inline) :contents $inline))
-(add-html-rule ($flow -> (block-level-element) :contents block-level-element))
+(add-html-rule ($flow -> (block-level-element) :contents block-level-element)
+	       :predict-test (lambda (token)
+			       (typep token 'start-tag-start)))
 
 (defmethod display-parse-tree ((entity $flow) (syntax html-syntax) pane)
   (with-slots (contents) entity
@@ -379,11 +413,7 @@
 
 (define-list <a>-attributes <a>-attribute)
 
-(defclass <a> (html-tag)
-  ((start :initarg :start)
-   (name :initarg :name)
-   (attributes :initarg :attributes)
-   (end :initarg :end)))
+(defclass <a> (html-start-tag) ())
 
 (add-html-rule (<a> -> (start-tag-start
 			(word (and (= (end-offset start-tag-start) (start-offset word))
@@ -392,13 +422,6 @@
 			tag-end)
 		    :start start-tag-start :name word :attributes <a>-attributes :end tag-end))
 
-(defmethod display-parse-tree ((entity <a>) (syntax html-syntax) pane)
-  (with-slots (start name attributes end) entity
-    (display-parse-tree start syntax pane)
-    (display-parse-tree name syntax pane)
-    (display-parse-tree attributes syntax pane)
-    (display-parse-tree end syntax pane)))
-
 (define-end-tag </a> "a")
 
 (defclass a-element (inline-element)
@@ -431,11 +454,7 @@
 
 ;;;;;;;;;;;;;;; p element
 
-(defclass <p> (html-tag)
-  ((start :initarg :start)
-   (name :initarg :name)
-   (attributes :initarg :attributes)
-   (end :initarg :end)))
+(defclass <p> (html-start-tag) ())
 
 (add-html-rule (<p> -> (start-tag-start
 			(word (and (= (end-offset start-tag-start) (start-offset word))
@@ -444,13 +463,6 @@
 			tag-end)
 		    :start start-tag-start :name word :attributes common-attributes :end tag-end))
 
-(defmethod display-parse-tree ((entity <p>) (syntax html-syntax) pane)
-  (with-slots (start name attributes end) entity
-    (display-parse-tree start syntax pane)
-    (display-parse-tree name syntax pane)
-    (display-parse-tree attributes syntax pane)
-    (display-parse-tree end syntax pane)))
-
 (define-end-tag </p> "p")
 
 (defclass p-element (block-level-element)
@@ -469,11 +481,7 @@
 
 ;;;;;;;;;;;;;;; li element
 
-(defclass <li> (html-tag)
-  ((start :initarg :start)
-   (name :initarg :name)
-   (attributes :initarg :attributes)
-   (end :initarg :end)))
+(defclass <li> (html-start-tag) ())
 
 (add-html-rule (<li> -> (start-tag-start
 			 (word (and (= (end-offset start-tag-start) (start-offset word))
@@ -485,13 +493,6 @@
 		     :attributes common-attributes
 		     :end tag-end))
 
-(defmethod display-parse-tree ((entity <li>) (syntax html-syntax) pane)
-  (with-slots (start name attributes end) entity
-    (display-parse-tree start syntax pane)
-    (display-parse-tree name syntax pane)
-    (display-parse-tree attributes syntax pane)
-    (display-parse-tree end syntax pane)))
-
 (define-end-tag </li> "li")
 
 (defclass li-element (html-nonterminal)
@@ -513,11 +514,7 @@
 
 ;;;;;;;;;;;;;;; ul element
 
-(defclass <ul> (html-tag)
-  ((start :initarg :start)
-   (name :initarg :name)
-   (attributes :initarg :attributes)
-   (end :initarg :end)))
+(defclass <ul> (html-start-tag) ())
 
 (add-html-rule (<ul> -> (start-tag-start
 			 (word (and (= (end-offset start-tag-start) (start-offset word))
@@ -529,13 +526,6 @@
 		     :attributes common-attributes
 		     :end tag-end))
 
-(defmethod display-parse-tree ((entity <ul>) (syntax html-syntax) pane)
-  (with-slots (start name attributes end) entity
-    (display-parse-tree start syntax pane)
-    (display-parse-tree name syntax pane)
-    (display-parse-tree attributes syntax pane)
-    (display-parse-tree end syntax pane)))
-
 (define-end-tag </ul> "ul")
 
 (define-nonempty-list li-elements li-element)
@@ -624,11 +614,7 @@
 
 (define-list <html>-attributes <html>-attribute)
 
-(defclass <html> (html-tag)
-  ((start :initarg :start)
-   (name :initarg :name)
-   (attributes :initarg :attributes)
-   (end :initarg :end)))
+(defclass <html> (html-start-tag) ())
 
 (add-html-rule (<html> -> (start-tag-start
 			   (word (and (= (end-offset start-tag-start) (start-offset word))
@@ -637,13 +623,6 @@
 			   tag-end)
 		       :start start-tag-start :name word :attributes <html>-attributes :end tag-end))
 
-(defmethod display-parse-tree ((entity <html>) (syntax html-syntax) pane)
-  (with-slots (start name attributes end) entity
-    (display-parse-tree start syntax pane)
-    (display-parse-tree name syntax pane)
-    (display-parse-tree attributes syntax pane)
-    (display-parse-tree end syntax pane)))
-
 (define-end-tag </html> "html")
 
 (defclass html (html-nonterminal)
@@ -736,13 +715,14 @@
      (when (and (end-offset entity) (mark> (end-offset entity) top))
        (call-next-method))))
 
-(defmethod display-parse-tree ((entity html-token) (syntax html-syntax) pane)
+(defmethod display-parse-tree ((entity html-lexeme) (syntax html-syntax) pane)
   (flet ((cache-test (t1 t2)
-	   (and (eq t1 t2)
-		(eq (slot-value t1 'ink)
-		    (medium-ink (sheet-medium pane)))
-		(eq (slot-value t1 'face)
-		    (text-style-face (medium-text-style (sheet-medium pane)))))))
+	   (let ((result (and (eq t1 t2)
+			      (eq (slot-value t1 'ink)
+				  (medium-ink (sheet-medium pane)))
+			      (eq (slot-value t1 'face)
+				  (text-style-face (medium-text-style (sheet-medium pane)))))))
+	     result)))
     (updating-output (pane :unique-id entity
 			   :id-test #'eq
 			   :cache-value entity
@@ -761,7 +741,7 @@
   (with-drawing-options (pane :ink +green4+)
     (call-next-method)))
 
-(defmethod display-parse-tree :before ((entity html-token) (syntax html-syntax) pane)
+(defmethod display-parse-tree :before ((entity html-lexeme) (syntax html-syntax) pane)
   (handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
   (setf *white-space-start* (end-offset entity)))
 




More information about the Climacs-cvs mailing list