[climacs-cvs] CVS update: climacs/prolog-syntax.lisp

Christophe Rhodes crhodes at common-lisp.net
Sat May 7 16:41:04 UTC 2005


Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv14244

Modified Files:
	prolog-syntax.lisp 
Log Message:
Improve the Prolog tokenizer.  We now recognize
  * binary constants: 0b...
  * octal constants:  0o...
  * hexadecimal constants: 0x...
  * char-code constants: 0'<quoted-char>
  * escaped characters in quoted strings:
    ** meta escapes such as \"
    ** control escapes such as \a
    ** numeric escapes such as \0177\ and \xabcd\
    ** "" (within a char-code-string) and '' (within a quoted-atom)

Date: Sat May  7 18:41:03 2005
Author: crhodes

Index: climacs/prolog-syntax.lisp
diff -u climacs/prolog-syntax.lisp:1.17 climacs/prolog-syntax.lisp:1.18
--- climacs/prolog-syntax.lisp:1.17	Sun Apr 17 17:44:39 2005
+++ climacs/prolog-syntax.lisp	Sat May  7 18:41:03 2005
@@ -111,7 +111,8 @@
   
   (def (name t) identifier graphic quoted semicolon cut)
   (def (variable t) anonymous named)
-  (def (integer t))
+  (def (integer t) integer-constant character-code-constant binary-constant
+                   octal-constant hexadecimal-constant)
   (def (float-number t))
   (def (char-code-list t))
   (def (open-ct))
@@ -157,6 +158,58 @@
 	   (bo ()
 	     (vector-pop string)
 	     (backward-object scan)))
+      (macrolet ((read-quoted-char (char)
+		   `(block read-quoted-char
+		     (let ((o (object-after scan)))
+		       (tagbody
+			START
+			  (cond 
+			    ((eql o #\\) (fo) (go ESCAPE))
+			    ((eql o ,char) (fo) (go QUOTE))
+			    (t (fo) (return-from read-quoted-char t)))
+			QUOTE
+			  (if (end-of-buffer-p scan)
+			      (return-from read-quoted-char nil)
+			      (let ((o (object-after scan)))
+				(cond
+				  ((eql o ,char) (fo) (return-from read-quoted-char t))
+				  (t (return-from read-quoted-char nil)))))
+			ESCAPE
+			  (if (end-of-buffer-p scan)
+			      (return (make-instance 'error-lexeme))
+			      (let ((o (object-after scan)))
+				(cond
+				  ;; meta (6.5.5)
+				  ((position o "\\'\"`") (fo) (return-from read-quoted-char t))
+				  ;; symbolic (6.4.2.1)
+				  ((position o "abfnrtv") (fo) (return-from read-quoted-char t))
+				  ;; octal
+				  ((digit-char-p o 8) (fo)
+				   (tagbody
+				    LOOP
+				      (when (end-of-buffer-p scan)
+					(return (make-instance 'error-lexeme)))
+				      (let ((o (object-after scan)))
+					(cond
+					  ((eql o #\\) (fo) (return-from read-quoted-char t))
+					  ((digit-char-p o 8) (fo) (go LOOP))
+					  (t (return (make-instance 'error-lexeme)))))))
+				  ((eql o #\x) (fo)
+				   (if (or (end-of-buffer-p scan)
+					   (not (digit-char-p (object-after scan) 16)))
+				       (return (make-instance 'error-lexeme))
+				       (progn 
+					 (fo)
+					 (tagbody
+					  LOOP
+					    (when (end-of-buffer-p scan)
+					      (return (make-instance 'error-lexeme)))
+					    (let ((o (object-after scan)))
+					      (cond
+						((eql o #\\) (fo) (return-from read-quoted-char t))
+						((digit-char-p o 16) (fo) (go LOOP))
+						(t (return (make-instance 'error-lexeme)))))))))
+				  (t (return (make-instance 'error-lexeme)))))))))))
       (let ((object (object-after scan)))
 	(block nil
 	  (tagbody
@@ -173,6 +226,7 @@
 		(fo) (return (make-instance 'cut-lexeme)))
 	       ((eql object #\_) (fo) (go VARIABLE))
 	       ((upper-case-p object) (fo) (go NAMED-VARIABLE))
+	       ((eql object #\0) (fo) (go NUMBER-OR-INTEGER))
 	       ((digit-char-p object) (fo) (go NUMBER))
 	       ((eql object #\") (fo) (go CHAR-CODE-LIST))
 	       ((eql object #\()
@@ -243,14 +297,10 @@
 		   (return (make-instance 'end-lexeme)))
 		  (t (return (make-instance 'graphic-lexeme))))))
 	   QUOTED-TOKEN
-	     (loop until (end-of-buffer-p scan)
-		;; FIXME
-		until (eql (object-after scan) #\')
-		do (fo))
-	     (if (end-of-buffer-p scan)
-		 (return (make-instance 'error-lexeme))
-                 (progn (fo)
-                        (return (make-instance 'quoted-lexeme))))
+	     (loop named #:mu
+		   until (end-of-buffer-p scan)
+		   while (read-quoted-char #\'))
+	     (return (make-instance 'quoted-lexeme))
 	   VARIABLE
 	     (if (or (end-of-buffer-p scan)
                      (let ((object (object-after scan)))
@@ -265,20 +315,47 @@
                             (eql object #\_)))
 		do (fo))
 	     (return (make-instance 'named-lexeme))
+	   NUMBER-OR-INTEGER
+	     (if (end-of-buffer-p scan)
+		 (return (make-instance 'integer-lexeme))
+		 (let ((object (object-after scan)))
+		   (cond
+		     ((eql object #\') (fo) (go CHARACTER-CODE-CONSTANT))
+		     ((eql object #\b) (fo) (go BINARY-CONSTANT))
+		     ((eql object #\o) (fo) (go OCTAL-CONSTANT))
+		     ((eql object #\x) (fo) (go HEXADECIMAL-CONSTANT))
+		     ((digit-char-p object) (fo) (go NUMBER))
+		     ;; FIXME: floats
+		     (t (return (make-instance 'integer-lexeme))))))
+	   CHARACTER-CODE-CONSTANT
+	     (if (read-quoted-char #\')
+		 (return (make-instance 'character-code-constant-lexeme))
+		 (return (make-instance 'error-lexeme)))
+	   BINARY-CONSTANT
+	     (loop until (end-of-buffer-p scan)
+		   while (digit-char-p (object-after scan) 2)
+		   do (fo))
+	     (return (make-instance 'binary-constant-lexeme))
+	   OCTAL-CONSTANT
+	     (loop until (end-of-buffer-p scan)
+		   while (digit-char-p (object-after scan) 8)
+		   do (fo))
+	     (return (make-instance 'octal-constant-lexeme))
+	   HEXADECIMAL-CONSTANT
+	     (loop until (end-of-buffer-p scan)
+		   while (digit-char-p (object-after scan) 16)
+		   do (fo))
+	     (return (make-instance 'hexadecimal-constant-lexeme))
 	   NUMBER
 	     (loop until (end-of-buffer-p scan)
-		while (digit-char-p (object-after scan))
-		do (fo))
-	     (return (make-instance 'integer-lexeme))
+		   while (digit-char-p (object-after scan))
+		   do (fo))
+	     (return (make-instance 'integer-constant-lexeme))
 	   CHAR-CODE-LIST
-	     (loop until (end-of-buffer-p scan)
-		;; FIXME
-		until (eql (object-after scan) #\")
-		do (fo))
-	     (if (end-of-buffer-p scan)
-		 (return (make-instance 'error-lexeme))
-		 (progn (fo)
-			(return (make-instance 'char-code-list-lexeme))))))))))
+	     (loop named #:mu
+		   until (end-of-buffer-p scan)
+		   while (read-quoted-char #\"))
+	     (return (make-instance 'char-code-list-lexeme)))))))))
 
 ;;; parser
 




More information about the Climacs-cvs mailing list