[climacs-cvs] CVS update: climacs/prolog-syntax.lisp

Christophe Rhodes crhodes at common-lisp.net
Sat Apr 2 22:13:43 UTC 2005


Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv8882

Modified Files:
	prolog-syntax.lisp 
Log Message:
Prolog syntax improvements:

* PRINT-OBJECT for PROLOG-LEXEMEs, for less pain while debugging;

* OPEN-CT production rule from OPEN-CT-LEXEME, because we can;

* rework the lexer a bit.  Now UPDATE-SYNTAX just invalidates the lex
  as it invalidates the parse, and UPDATE-SYNTAX-FOR-DISPLAY relexes as 
  far as it needs to;

* we need operator-compound-lterm and subclasses, because we cannot
  create multiple nonterminals from one rule: returning
    (make-instance 'lterm :term (make-instance 'foo ...))
  from a production rule leaves some slots in the FOO unfilled;

* note my own bafflement as to why an apparently infinitely-recursive
  production doesn't recurse infinitely.  It can be fixed when needed,
  but why isn't it triggering?

This version still gets various aspects of multiline lexemes wrong, but 
it's a lot better than before.

Date: Sun Apr  3 00:13:27 2005
Author: crhodes

Index: climacs/prolog-syntax.lisp
diff -u climacs/prolog-syntax.lisp:1.5 climacs/prolog-syntax.lisp:1.6
--- climacs/prolog-syntax.lisp:1.5	Thu Mar 31 12:16:23 2005
+++ climacs/prolog-syntax.lisp	Sun Apr  3 00:13:26 2005
@@ -65,6 +65,9 @@
 
 (defclass prolog-lexeme (prolog-token)
   ((state :initarg :state)))
+(defmethod print-object ((o prolog-lexeme) s)
+  (print-unreadable-object (o s :type t)
+    (format s (lexeme-string o))))
 
 (defclass start-lexeme (prolog-lexeme) ())
 
@@ -127,13 +130,25 @@
   
   (def (error)))
 
+;;; open-ct is a special case: by 6.5.1 it cannot be preceded by
+;;; layout text.  We could elide this and its grammar rules, but this
+;;; way we get a clearer relationship between the standard and its
+;;; expression here.
+(defclass open-ct (prolog-nonterminal)
+  ((syntactic-lexeme :initarg :syntactic-lexeme :accessor syntactic-lexeme)))
+(defmethod display-parse-tree ((entity open-ct) (syntax prolog-syntax) pane)
+  (display-parse-tree (syntactic-lexeme entity) syntax pane))
+(define-prolog-rule (open-ct -> (open-ct-lexeme))
+  (make-instance 'open-ct :syntactic-lexeme open-ct-lexeme))
+
 ;;; 6.4.1
 (define-prolog-rule (layout-text -> (comment-lexeme layout-text))
   (make-instance 'layout-text :comment comment-lexeme :cont layout-text))
 (define-prolog-rule (layout-text -> ())
   (make-instance 'layout-text :cont nil))
 
-(defclass prolog-lexer (incremental-lexer) ())
+(defclass prolog-lexer (incremental-lexer)
+  ((valid-lex :initarg :valid-lex :accessor valid-lex :initform 1)))
 
 (defmethod next-lexeme ((lexer prolog-lexer) scan)
   (let ((string (make-array 0 :element-type 'character
@@ -180,10 +195,10 @@
 	       (t (fo) (return (make-instance 'error-lexeme))))
 	   IDENTIFIER
 	     (loop until (end-of-buffer-p scan)
-		while (let ((object (object-after scan)))
-                        (or (alphanumericp object)
-                            (eql object #\_)))
-		do (fo))
+		   while (let ((object (object-after scan)))
+			   (or (alphanumericp object)
+			       (eql object #\_)))
+		   do (fo))
 	     (return (make-instance 'identifier-lexeme))
            LINE-COMMENT
              (loop until (end-of-buffer-p scan)
@@ -429,7 +444,6 @@
 (defclass atom (prolog-nonterminal)
   ((value :initarg :value :accessor value)))
 (defmethod syntactic-lexeme ((thing atom))
-  ;; FIXME: wrong for empty-list atom and curly-brackets atom
   (syntactic-lexeme (value thing)))
 (defclass empty-list (prolog-nonterminal)
   (([ :initarg :[ :accessor [)
@@ -484,6 +498,42 @@
 (defmethod display-parse-tree ((entity lterm) (syntax prolog-syntax) pane)
   (display-parse-tree (term entity) syntax pane))
 
+;;; FIXME: the need for these is because it is a protocol violation to
+;;; create nested nonterminals from one rule.
+(defclass operator-compound-lterm (lterm)
+  ((operator :initarg :operator :accessor operator)))
+(defmethod compound-term-p ((l operator-compound-lterm))
+  t)
+(defmethod functor ((l operator-compound-lterm))
+  (operator l))
+(defclass binary-operator-compound-lterm (operator-compound-lterm)
+  ((left :initarg :left :accessor left)
+   (right :initarg :right :accessor right)))
+(defmethod arity ((l binary-operator-compound-lterm))
+  2)
+(defclass prefix-operator-compound-lterm (operator-compound-lterm)
+  ((right :initarg :right :accessor right)))
+(defmethod arity ((l prefix-operator-compound-lterm))
+  1)
+(defclass postfix-operator-compound-lterm (operator-compound-lterm)
+  ((left :initarg :left :accessor left)))
+(defmethod arity ((l postfix-operator-compound-lterm))
+  1)
+
+(defmethod display-parse-tree
+    ((entity binary-operator-compound-lterm) (syntax prolog-syntax) pane)
+  (display-parse-tree (left entity) syntax pane)
+  (display-parse-tree (operator entity) syntax pane)
+  (display-parse-tree (right entity) syntax pane))
+(defmethod display-parse-tree
+    ((entity prefix-operator-compound-lterm) (syntax prolog-syntax) pane)
+  (display-parse-tree (operator entity) syntax pane)
+  (display-parse-tree (right entity) syntax pane))
+(defmethod display-parse-tree
+    ((entity postfix-operator-compound-lterm) (syntax prolog-syntax) pane)
+  (display-parse-tree (left entity) syntax pane)
+  (display-parse-tree (operator entity) syntax pane))
+
 (defclass op (prolog-nonterminal)
   ((name :initarg :name :accessor name)
    (priority :initarg :priority :accessor priority)
@@ -579,9 +629,9 @@
   (make-instance 'variable-term :priority 0 :name variable))
 
 ;;; 6.3.3
-(define-prolog-rule (term -> (atom open-ct-lexeme arg-list close))
+(define-prolog-rule (term -> (atom open-ct arg-list close))
   (make-instance 'functional-compound-term :priority 0 :functor atom
-                 :arg-list arg-list :open-ct open-ct-lexeme :close close))
+                 :arg-list arg-list :open-ct open-ct :close close))
 (define-prolog-rule (arg-list -> (exp))
   (make-instance 'arg-list :exp exp))
 (define-prolog-rule (arg-list -> (exp comma arg-list))
@@ -613,17 +663,21 @@
 ;;; term would be, by explicitly writing the second production rule
 ;;; out here, and by using inegality tests rather than equalities for
 ;;; priorities elsewhere.  LTERMs act as containers for terms.
+;;;
+;;; FIXME: why on earth doesn't this cause infinite recursion?  If
+;;; LTERM is a subtype of TERM, as it is, this rule should surely be
+;;; always applicable.
 (define-prolog-rule (lterm -> (term))
   (make-instance 'lterm :term term :priority (1+ (priority term))))
 
 (define-prolog-rule (term -> (open (term (<= (priority term) 1201)) close))
   (make-instance 'bracketed-term :priority 0
                  :open open :term term :close close))
-(define-prolog-rule (term -> (open-ct-lexeme
+(define-prolog-rule (term -> (open-ct
                               (term (<= (priority term) 1201))
                               close))
   (make-instance 'bracketed-term :priority 0
-                 :open open-ct-lexeme :term term :close close))
+                 :open open-ct :term term :close close))
 
 ;;; 6.3.4.2
 ;;;
@@ -636,17 +690,15 @@
                                (right term)))
   (when (and (< (priority left) (priority op))
              (< (priority right) (priority op)))
-    (make-instance 'lterm :priority (priority op) :term
-                   (make-instance 'binary-operator-compound-term
-                                  :left left :operator op :right right))))
+    (make-instance 'binary-operator-compound-lterm :priority (priority op)
+		   :left left :operator op :right right)))
 (define-prolog-rule (lterm -> ((left lterm)
                                (op (eql (specifier op) :yfx))
                                (right term)))
   (when (and (<= (priority left) (priority op))
              (< (priority right) (priority op)))
-    (make-instance 'lterm :priority (priority op) :term
-                   (make-instance 'binary-operator-compound-term
-                                  :left left :operator op :right right))))
+    (make-instance 'binary-operator-compound-lterm :priority (priority op)
+		   :left left :operator op :right right)))
 (define-prolog-rule (term -> ((left term)
                               (op (eql (specifier op) :xfy))
                               (right term)))
@@ -656,14 +708,12 @@
                    :left left :operator op :right right)))
 (define-prolog-rule (lterm -> (lterm (op (eql (specifier op) :yf))))
   (when (<= (priority lterm) (priority op))
-    (make-instance 'lterm :priority (priority op) :term
-                   (make-instance 'postfix-operator-compound-term
-                                  :left lterm :operator op))))
+    (make-instance 'postfix-operator-compound-lterm :priority (priority op)
+		   :left lterm :operator op)))
 (define-prolog-rule (lterm -> (term (op (eql (specifier op) :xf))))
   (when (< (priority term) (priority op))
-    (make-instance 'lterm :priority (priority op) :term
-                   (make-instance 'postfix-operator-compound-term
-                                  :left term :operator op))))
+    (make-instance 'postfix-operator-compound-lterm :priority (priority op)
+		   :left term :operator op)))
 (define-prolog-rule (term -> ((op (eql (specifier op) :fy)) term))
   (when (and (or (not (string= (lexeme-string (syntactic-lexeme op)) "-"))
                  (not (numeric-constant-p term)))
@@ -676,9 +726,8 @@
                  (not (numeric-constant-p term)))
              (not (typep (first-lexeme term) 'open-ct-lexeme))
              (< (priority term) (priority op)))
-    (make-instance 'lterm :priority (priority op) :term
-                   (make-instance 'prefix-operator-compound-term
-                                  :right term :operator op))))
+    (make-instance 'prefix-operator-compound-lterm :priority (priority op)
+		   :right term :operator op)))
 
 ;;; 6.3.4.3
 (macrolet ((def (class &rest specifiers)
@@ -782,7 +831,7 @@
              (and (consp value)
                   (typep (car value) 'atom)
                   (typep (cadr value) 'integer))))))
-  
+
 (defun first-lexeme (thing)
   ;; FIXME: we'll need to implement this.
   (declare (ignore thing))
@@ -792,25 +841,66 @@
 
 (defmethod update-syntax-for-display (buffer (syntax prolog-syntax) top bot)
   (with-slots (parser lexer valid-parse) syntax
-     (loop until (= valid-parse (nb-lexemes lexer))
-	   while (mark<= (end-offset (lexeme lexer valid-parse)) bot)
-	   do (let ((current-token (lexeme lexer (1- valid-parse)))
-		    (next-lexeme (lexeme lexer valid-parse)))
-		(setf (slot-value next-lexeme 'state)
-		      (advance-parse parser (list next-lexeme) (slot-value current-token 'state))))
-	      (incf valid-parse))))
+    (with-slots (climacs-syntax::lexemes valid-lex) lexer
+      (let ((scan (clone-mark (low-mark buffer) :left)))
+        (setf (offset scan)
+              (end-offset (lexeme lexer (1- valid-lex))))
+	;; lex as far as we need.  We actually win quite a lot if we
+	;; can implement the splicing described in the FIXME note,
+	;; below, because there's then a good chance that CLIM's
+	;; incremental redisplay will Do The Right Thing (on the EQ
+	;; lexemes)
+        (loop do (skip-inter-lexeme-objects lexer scan)
+	      ;; FIXME: are we allowed to mix DO and UNTIL like this?
+	      ;; I doubt it.
+              until (end-of-buffer-p scan)
+	      until (mark< bot (start-offset (lexeme lexer (1- valid-lex))))
+	      ;; FIXME: a further criterion is when scan matches the
+	      ;; start-offset of an element in lexemes, at which point
+	      ;; we know that the entirety of the rest of the old lex
+	      ;; is valid without doing any further work.
+	      do (let* ((start-mark (clone-mark scan))
+			(lexeme (next-lexeme lexer scan))
+			(size (- (offset scan) (offset start-mark))))
+		   (setf (slot-value lexeme 'climacs-syntax::start-mark) start-mark
+			 (slot-value lexeme 'climacs-syntax::size) size)
+		   (insert-lexeme lexer valid-lex lexeme)
+		   (incf valid-lex)))
+	;; remove lexemes which we know to be invalid
+	(let ((end (end-offset (lexeme lexer (1- valid-lex)))))
+	  (loop until (= (nb-lexemes lexer) valid-lex)
+		while (< (start-offset (lexeme lexer valid-lex)) end)
+		do (delete* climacs-syntax::lexemes valid-lex))))
+      ;; parse up to the limit of validity imposed by the lexer, or
+      ;; the bottom of the visible area
+      (loop until (= valid-parse valid-lex)
+	    while (mark<= (end-offset (lexeme lexer valid-parse)) bot)
+	    do (let ((current-token (lexeme lexer (1- valid-parse)))
+		     (next-lexeme (lexeme lexer valid-parse)))
+		 (setf (slot-value next-lexeme 'state)
+		       (advance-parse parser (list next-lexeme) 
+				      (slot-value current-token 'state)))
+		 (incf valid-parse))))))
 
 (defmethod inter-lexeme-object-p ((lexer prolog-lexer) object)
   (member object '(#\Space #\Newline)))
 
 (defmethod update-syntax (buffer (syntax prolog-syntax))
   (with-slots (lexer valid-parse) syntax
-     (let* ((low-mark (low-mark buffer))
-	    (high-mark (high-mark buffer)))
-       (when (mark<= low-mark high-mark)
-	 (let ((first-invalid-position (delete-invalid-lexemes lexer low-mark high-mark)))
-	   (setf valid-parse first-invalid-position)
-	   (update-lex lexer first-invalid-position high-mark))))))
+    (let* ((low-mark (low-mark buffer))
+	   (high-mark (high-mark buffer)))
+      (when (mark<= low-mark high-mark)
+	(with-slots (climacs-syntax::lexemes valid-lex) lexer
+	  (let ((start 1)
+		(end (nb-elements climacs-syntax::lexemes)))
+	    (loop while (< start end)
+		  do (let ((middle (floor (+ start end) 2)))
+		       (if (mark< (end-offset (element* climacs-syntax::lexemes middle))
+				  low-mark)
+			   (setf start (1+ middle))
+			   (setf end middle))))
+	    (setf valid-lex start)
+	    (setf valid-parse start)))))))
 
 ;;; display
 
@@ -866,10 +956,7 @@
 		  'string
 		  :stream pane))))
 
-;;; KLUDGE: below this line, this is just s/html/prolog/ on the
-;;; definitions in html-syntax.lisp
-
-(defmethod display-parse-tree :before ((entity prolog-token) (syntax prolog-syntax) pane)
+(defmethod display-parse-tree :before ((entity prolog-lexeme) (syntax prolog-syntax) pane)
   (handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
   (setf *white-space-start* (end-offset entity)))
 
@@ -888,13 +975,16 @@
 	(display-parse-stack (parse-stack-symbol top) top syntax pane)
 	(display-parse-tree (target-parse-tree state) syntax pane))))
 
+(defun nb-valid-lexemes (lexer)
+  (slot-value lexer 'valid-lex))
+
 (defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax prolog-syntax) current-p)
   (with-slots (top bot) pane
      (setf *cursor-positions* (make-array (1+ (number-of-lines-in-region top bot)))
 	   *current-line* 0
 	   (aref *cursor-positions* 0) (stream-cursor-position pane))
      (with-slots (lexer) syntax
-	(let ((average-token-size (max (float (/ (size (buffer pane)) (nb-lexemes lexer)))
+	(let ((average-token-size (max (float (/ (size (buffer pane)) (nb-valid-lexemes lexer)))
 				       1.0)))
 	  ;; find the last token before bot
 	  (let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
@@ -902,7 +992,7 @@
 	    (loop until (mark<= (end-offset (lexeme lexer (1- end-token-index))) bot)
 		  do (decf end-token-index))
 	    ;; go forward to the last token before bot
-	    (loop until (or (= end-token-index (nb-lexemes lexer))
+	    (loop until (or (= end-token-index (nb-valid-lexemes lexer))
 			    (mark> (start-offset (lexeme lexer end-token-index)) bot))
 		  do (incf end-token-index))
 	    (let ((start-token-index end-token-index))




More information about the Climacs-cvs mailing list