[climacs-devel] cl-automaton -- patch to print minimal forms of regexps

Derek Peschel dpeschel at eskimo.com
Sat Sep 24 07:44:08 UTC 2005


The attached code replaces the existing print-object method for regexp
objects.  I find the existing output too full of distracting backslashes
and parentheses.  The new output uses the operator precedences from the
grammar and hopefully includes the minimum number of parentheses and
backslashes.

Please share your advice or bug reports.  The algorithm works but I had
been testing it by passing (string-regexp ...) results to an unparsing
function.  The function is now a method but I haven't tested that version yet.
The comments and formatting aren't done yet.  (I'm going on a day trip today,
Saturday, and I've run out of time.)  In order to get the backslash rules
exactly right I need to understand the exact input syntax, which I'm still
working on.

Alex: I like reading consistently formatted files, and you created the
one I'm patching, so if you have a style I'm happy to fit with it.

-- Derek
-------------- next part --------------
*** regexp.lisp	Thu Aug  4 15:07:48 2005
--- regexp.new	Sat Sep 24 00:24:26 2005
***************
*** 1,10 ****
  ;;; -*- mode: lisp -*-
  ;;; 
  ;;; (c) copyright 2005 by Aleksandar Bakic (a_bakic at yahoo.com)
! ;;; 
  
! ;;; Derived from dk.brics.automaton v1.8.1, (c) 2001-2005 by Anders M?ller
! ;;; - Some comments have been copied verbatim from the original code.
  
  ;;; Regular expressions are built from the following abstract syntax:
  
--- 1,18 ----
  ;;; -*- mode: lisp -*-
  ;;; 
  ;;; (c) copyright 2005 by Aleksandar Bakic (a_bakic at yahoo.com)
! ;;;                   and Derek Peschel (dpeschel at eskimo.com)
  
! ;;; print-object method and related functions based on code in
! ;;;
! ;;; Ramsey, Norman. Unparsing expressions with prefix and postfix operators.
! ;;;  Software -- Practice & Experience, 28(12):1327-1356, October 1998
! 
! ;;; Other contents of this file derived from dk.brics.automaton v1.8.1,
! ;;; (c) 2001-2005 by Anders M?ller - Some comments have been copied verbatim
! ;;; from the original code.
! 
! ;;;; Grammar.
  
  ;;; Regular expressions are built from the following abstract syntax:
  
***************
*** 55,62 ****
--- 63,74 ----
  ;;; n and m have the same number of digits, then the conforming
  ;;; strings must have that length (i.e. prefixed by 0's).
  
+ ;;;; Package.
+ 
  (in-package :automaton)
  
+ ;;;; Types, constants, class.
+ 
  (deftype kind ()
    '(member nil :union :concatenation :intersection :optional :repeat
      :repeat-min :repeat-minmax :complement :char :char-range :anychar :empty
***************
*** 89,94 ****
--- 101,108 ----
     (flags :initform +all+ :initarg :flags :reader flags :type flags-type)
     (pos :initform 0 :initarg :pos :accessor pos :type integer)))
  
+ ;;;; Equality predicate.
+ 
  (defun regexp-equal (r1 r2) ; for testing
    (or (eq r1 r2)
        (and (eq (kind r1) (kind r2))
***************
*** 103,108 ****
--- 117,124 ----
  	   (eql (to r1) (to r2))
  	   (eql (flags r1) (flags r2)))))
  
+ ;;;; Type conversion functions.
+ 
  (defun string-regexp (s &optional fs)
    "Returns a new regexp object corresponding to regular expression
  string S. FS is a logior or optional syntax flags."
***************
*** 160,237 ****
  	      (interval-automaton (minr r) (maxr r) (digits r))))))
      (minimize a)))
  
! (defmethod print-object ((r regexp) s)
!   (ecase (kind r)
!     (:union
!      (princ "(" s)
!      (print-object (exp1 r) s)
!      (princ "\|" s)
!      (print-object (exp2 r) s)
!      (princ ")" s))
!     (:concatenation
!      (print-object (exp1 r) s)
!      (print-object (exp2 r) s))
!     (:intersection
!      (princ "(" s)
!      (print-object (exp1 r) s)
!      (princ "&" s)
!      (print-object (exp2 r) s)
!      (princ ")" s))
!     (:optional
!      (princ "(" s)
!      (print-object (exp1 r) s)
!      (princ ")?" s))
!     (:repeat
!      (princ "(" s)
!      (print-object (exp1 r) s)
!      (princ ")*" s))
!     (:repeat-min
!      (princ "(" s)
!      (print-object (exp1 r) s)
!      (princ "){" s)
!      (princ (minr r) s)
!      (princ ",}" s))
!     (:repeat-minmax
!      (princ "(" s)
!      (print-object (exp1 r) s)
!      (princ "){" s)
!      (princ (minr r) s)
!      (princ "," s)
!      (princ (maxr r) s)
!      (princ "}" s))
!     (:complement
!      (princ "~(" s)
!      (print-object (exp1 r) s)
!      (princ ")" s))
!     (:char
!      (princ "\\" s)
!      (princ (c r) s))
!     (:char-range
!      (princ "[\\" s)
!      (princ (from r) s)
!      (princ "-\\" s)
!      (princ (to r) s)
!      (princ "]" s))
!     (:anychar
!      (princ "." s))
!     (:empty
!      (princ "#" s))
!     (:string
!      (princ "\"" s)
!      (princ (s r) s)
!      (princ "\"" s))
!     (:anystring
!      (princ "@" s))
!     (:automaton
!      (princ "<" s)
!      (princ (s r) s)
!      (princ ">" s))
!     (:interval
!      (princ "<" s)
!      (format s "~V,'0D" (digits r) (minr r))
!      (princ "-" s)
!      (format s "~V,'0D" (digits r) (maxr r))
!      (princ ">" s))))
  
  (defun more (r)
    (< (pos r) (length (text r))))
--- 176,182 ----
  	      (interval-automaton (minr r) (maxr r) (digits r))))))
      (minimize a)))
  
! ;;;; Parsing methods and helper functions.  make-regexp constructor.
  
  (defun more (r)
    (< (pos r) (length (text r))))
***************
*** 420,423 ****
  
  (defun parse-char-exp (r)
    (match r #\\)
!   (next r))
\ No newline at end of file
--- 365,759 ----
  
  (defun parse-char-exp (r)
    (match r #\\)
!   (next r))
! 
! ;;;; print-object method and helper functions.
! 
! ;;; Package-wide information about operators.
! ;;; Keys are:
! ;;;   The variants of the "kind" CLOS type (for regexp objects) that act
! ;;;    like operators
! ;;;   The package-local symbol *max-precedence-operator*, which the unparse1
! ;;;    function returns to itself as a sentinel condition
! ;;; Each key hashes to a second-level hash table.
! ;;; Second-level keys and values are:
! ;;;   :printed-representation	function taking an AST node and returning
! ;;;				a string -- must read node-specific field
! ;;;				values, so it can't take a kind
! ;;;   *max-precedence-operator* has no printed representation
! ;;;   :precedence		integer -- the integer "less than" relation
! ;;;				is equivalent to the operator "takes
! ;;;				precedence over" relation
! ;;;   :fixity			:prefix or :infix or :postfix only
! ;;;   :associativity		:left or :right or :none only
! ;;;   infix operators, and only they, have associativity entries
! ;;;   DOCS other invariants?
! (defparameter *operator-properties* (make-hash-table))
! 
! ;;; Function to declare an infix operator.  Takes kind variant symbol,
! ;;;  printed-representation function, precedence integer, and asociativity
! ;;;  symbol.  Updates *operator-properties* as a side effect.  Return value
! ;;;  should not be used.
! (defun infix-operator (kind print prec assoc)
!     (let ((ht (make-hash-table)))
!     ;; create secondary hash table
!     (setf (gethash :printed-representation ht) print)
!     (setf (gethash :precedence		   ht) prec)
!     (setf (gethash :fixity		   ht) :infix)
!     (setf (gethash :associativity	   ht) assoc)
!     (setf (gethash :arity		   ht) 2)
!     ;; add secondary hash table to main hash table
!     (setf (gethash kind *operator-properties*) ht)))
! 
! ;;; Function to declare a prefix operator.  Takes kind variant symbol,
! ;;;  printed-representation function, and precedence integer.  Updates
! ;;;  *operator-properties* as a side effect.  Return value should not be used.
! (defun prefix-operator (kind print prec)
!   (let ((ht (make-hash-table)))
!     ;; create secondary hash table
!     (setf (gethash :printed-representation ht) print)
!     (setf (gethash :precedence		   ht) prec)
!     (setf (gethash :fixity		   ht) :prefix)
!     ;; (no entry for associativity)
!     (setf (gethash :arity		   ht) 1)
!     ;; add secondary hash table to main hash table
!     (setf (gethash kind *operator-properties*) ht)))
! 
! ;;; Function to declare a postfix operator.  Takes kind variant symbol,
! ;;;  printed-representation function, and precedence integer.  Updates
! ;;;  *operator-properties* as a side effect.  Return value should not be used.
! (defun postfix-operator (kind print prec)
!   (let ((ht (make-hash-table)))
!     ;; create secondary hash table
!     (setf (gethash :printed-representation ht) print)
!     (setf (gethash :precedence		   ht) prec)
!     (setf (gethash :fixity		   ht) :postfix)
!     ;; (no entry for associativity)
!     (setf (gethash :arity		   ht) 1)
!     ;; add secondary hash table to main hash table
!     (setf (gethash kind *operator-properties*) ht)))
! 
! ;;; Printed-representation functions for some operator kinds:
! 
! (defun print-repeat-min (node)
!   (format nil "{~D,}" (minr node)))
! 
! (defun print-repeat-minmax (node)
!   (format nil "{~D,~D}" (minr node) (maxr node)))
! 
! ;;; All other kinds have anonymous printed-representation functions.
! 
! ;;; Contents of *operator-properties*.  *min-precedence* is for consistency.
! ;;;  *max-precedence* is a property of *max-precedence-operator*.
! (defparameter *min-precedence*			       0)
! (infix-operator	  :union	 (lambda (node) "|")   1 :right)
! (infix-operator	  :intersection	 (lambda (node) "&")   2 :right)
! (infix-operator	  :concatenation (lambda (node) "")    3 :right)
! (postfix-operator :optional	 (lambda (node) "?")   4)
! (postfix-operator :repeat	 (lambda (node) "*")   4)
! (postfix-operator :repeat-min	 #'print-repeat-min    4)
! (postfix-operator :repeat-minmax #'print-repeat-minmax 4)
! (prefix-operator  :complement	 (lambda (node) "~")   5)
! (defparameter *max-precedence*			       6)
! 
! ;;; Package-local *max-precedence-operator* and its properties.  The symbol
! ;;;  value must be given, shouldn't conflict with any other symbol values
! ;;;  of hash table keys, and is otherwise important.
! (defvar *max-precedence-operator* t)
! (let ((ht (make-hash-table)))
!   ;; create secondary hash table
!   ;; (no entry for printed representation) (TO DO done to prevent errors,
!   ;;  but does it impede debugging?)
!   (setf (gethash :precedence    ht) *max-precedence*)
!   (setf (gethash :fixity	      ht) :infix)
!   (setf (gethash :associativity ht) :nonassoc)
!   (setf (gethash :arity	      ht) 2)
!   ;; add secondary hash table to main hash table
!   (setf (gethash *max-precedence-operator* *operator-properties*) ht))
! 
! ;;; Predicate to determine if a kind symbol acts like an operator.
! (defun operator-p (kind)
!   (multiple-value-bind (ht ht-status) (gethash kind *operator-properties*)
!     ht-status))
! 
! ;;; Function to look up the given property of the given kind, or cause
! ;;;  an error if the property isn't set.  The kind must be an operator.
! (defun get-operator-prop (kind key)
!   (multiple-value-bind (ht ht-status) (gethash kind *operator-properties*)
!     ;; check that KIND has an entry in first-level hash table
!     (if (null ht-status)
! 	;; it doesn't -- error
! 	(error "KIND ~S not in *OPERATOR-PROPERTIES*" kind)
! 	;; it does -- the first-level value is the second-level hash table;
! 	;;  check that KEY has an entry there
! 	(multiple-value-bind (value value-status) (gethash key ht)
! 	  (if (null value-status)
! 	      ;; it doesn't -- error
! 	      (error "KEY ~S not in ~S's entry in *OPERATOR-PROPERTIES*"
! 		     key kind)
! 	      ;; it does -- return KEY's value
! 	      value)))))
! 
! ;;; Function to look up and call the printed-representation function for
! ;;;  an AST node.  Can't take a kind because the printed-representation
! ;;;  function can't take a kind either.  The node's kind must be an operator.
! (defun print-operator (node)
!   (let ((print-fun (get-operator-prop (kind node) :printed-representation)))
!     (funcall print-fun node)))
! 
! ;;; Package-wide information about leaves.
! ;;; Keys are:
! ;;;   The variants of the "kind" CLOS type (for regexp objects) that act
! ;;;    like leaves
! ;;; Each key hashes to a second-level hash table.
! ;;; The only second-level key and value is:
! ;;;   :printed-representation	function taking an AST node and returning
! ;;;				a string -- must read node-specific field
! ;;;				values, so it can't take a kind
! (defparameter *leaf-properties* (make-hash-table))
! 
! ;;; Function to declare a leaf type.  Takes kind variant symbol and
! ;;;  printed-representation function.  Updates *leaf-properties* as a
! ;;;  side effect.  Return value should not be used.
! (defun leaf (kind print)
!   (let ((ht (make-hash-table)))
!     ;; create secondary hash table
!     (setf (gethash :printed-representation ht) print)
!     ;; add secondary hash table to main hash table
!     (setf (gethash kind *leaf-properties*) ht)))
! 
! ;;; Set of characters that don't read as literals and must be printed
! ;;;  after a backslash.
! (defparameter *non-literal-chars* "\"#&*.<?@[\\{|~")
! 
! ;;; Printed-representation function for nil-kind nodes.  For future use --
! ;;;  nil has no entry in *leaf-properties* so this function can never be
! ;;;  called.
! ;;; TO DO still need to find a better syntax
! (defun print-nil (node)
!   "{nilkind}")
! 
! ;;; Printed-representation functions for some leaf kidns:
! 
! (defun print-char (node)
!   (if (find (c node) *non-literal-chars*)
!       (format nil "\\~A" (c node))
!       (format nil "~A"   (c node))))
! 
! (defun print-char-range (node)
!  ;; TO DO add backslash as with print-char -- depends on input syntax
!   (format nil "[\\~A-\\~A]" (from node) (to node)))
! 
! (defun print-string (node)
!   ;; TO DO decide how to deal with backslashes here too
!   ;; either by adding them here, because you are printing a string in quotes,
!   ;;  or by adding them outside
!   (format nil "~S" (s node)))
! 
! (defun print-automaton (node)
!   ;; TO DO what characters can appear in names?	 can the parser handle them
!   ;;  by escaping?
!   (format nil "<~A>" (s node)))
! 
! (defun print-interval (node)
!   (format nil "<~V,'0D-~V,'0D>"
! 	  (digits node) (minr node) (digits node) (maxr node)))
! 
! ;;; All other kinds have anonymous printed-representation functions.
! 
! ;;; Contents of *leaf-properties*.
! (leaf :char       #'print-char)
! (leaf :char-range #'print-char-range)
! (leaf :anychar    (lambda (node) "."))
! (leaf :empty      (lambda (node) "#"))
! (leaf :string     #'print-string)
! (leaf :anystring  (lambda (node) "@"))
! (leaf :automaton  #'print-automaton)
! (leaf :interval   #'print-interval)
! ;; TO DO add entry for nil (uninitialized objects)
! 
! ;;; Predicate to determine if a kind symbol acts like a leaf.
! (defun leaf-p (kind)
!   (multiple-value-bind (ht ht-status) (gethash kind *leaf-properties*)
!     ht-status))
! 
! ;;; Function to look up the given property of the given kind, or cause
! ;;;  an error if the property isn't set.  The kind must be a leaf.
! (defun get-leaf-prop (kind key)
!   (multiple-value-bind (ht ht-status) (gethash kind *leaf-properties*)
!     ;; check that KIND has an entry in first-level hash table
!     (if (null ht-status)
! 	;; it doesn't -- error
! 	(error "KIND ~S not in *LEAF-PROPERTIES*" kind)
! 	;; it does -- the first-level value is the second-level hash table;
! 	;;  check that KEY has an entry there
! 	(multiple-value-bind (value value-status) (gethash key ht)
! 	  (if (null value-status)
! 	      ;; it doesn't -- error
! 	      (error "KEY ~S not in ~S's entry in *LEAF-PROPERTIES*"
! 		     key kind)
! 	      ;; it does -- return KEY's value
! 	      value)))))
! 
! ;;; Function to look up and call the printed-representation function for
! ;;;  an AST node.  Can't take a kind because the printed-representation
! ;;;  function can't take a kind either.  The node's kind must be a leaf.
! (defun print-leaf (node)
!  (let ((print-fun (get-leaf-prop (kind node) :printed-representation)))
!     (funcall print-fun node)))
! 
! (defun leaf-to-image (node)
!   (list (print-leaf node)))
! 
! (defun parenthesize-image (image)
!   (list image))
! 
! (defun prefix-image (node image)
!   (append (list (print-operator node))
! 	  image))
! 
! (defun image-infix-image (image1 node image2)
!   (append image1
! 	  (list (print-operator node))
! 	  image2))
! 
! (defun image-postfix (image node)
!   (append image
! 	  (list (print-operator node))))
! 
! ;;; Ramsey's predicate "noparens", but with the sense of the return values
! ;;;  flipped to hopefully avoid confusion (p. 14).
! (defun parens-required-p (inner-kind outer-kind direction)
!   (let ((inner-prec (get-operator-prop inner-kind :precedence))
! 	(inner-fix  (get-operator-prop inner-kind :fixity))
! 	(outer-prec (get-operator-prop outer-kind :precedence))
! 	(outer-fix  (get-operator-prop outer-kind :fixity)))
! 
!     (and (> inner-prec outer-prec)
! 	 ;;(return-from parens-required-p (cons 1 nil)))
! 	 (return-from parens-required-p nil))
! 
!     (and (eq inner-fix :postfix)
! 	 (eq direction :left)
! 	 ;;(return-from parens-required-p (cons 2 nil)))
! 	 (return-from parens-required-p nil))
! 
!     (and (eq inner-fix :prefix)
! 	 (eq direction :right)
! 	 ;;(return-from parens-required-p (cons 3 nil)))
! 	 (return-from parens-required-p nil))
! 
!     (and (eq inner-fix :infix)
! 	 (eq (get-operator-prop inner-kind :associativity) :left)
! 	 (eq direction :left)
! 	 ;;(return-from parens-required-p (cons 4 (not (and (= inner-prec outer-prec)
! 	 ;;						  (eq outer-fix :infix)
! 	 ;;						  (eq (get-operator-prop outer-kind :associativity) :left))))))
! 	 (return-from parens-required-p (not (and (= inner-prec outer-prec)
! 						  (eq outer-fix :infix)
! 						  (eq (get-operator-prop outer-kind :associativity) :left)))))
! 
!     (and (eq inner-fix :infix)
! 	 (eq (get-operator-prop inner-kind :associativity) :right)
! 	 (eq direction :right)
! 	 ;;(return-from parens-required-p (cons 5 (not (and (= inner-prec outer-prec)
! 	 ;;						  (eq outer-fix :infix)
! 	 ;;						  (eq (get-operator-prop outer-kind :associativity) :right))))))
! 	 (return-from parens-required-p (not (and (= inner-prec outer-prec)
! 						  (eq outer-fix :infix)
! 						  (eq (get-operator-prop outer-kind :associativity) :right)))))
! 
!     (and (eq direction :nonassoc)
! 	 ;;(return-from parens-required-p (cons 6 (not (eq inner-fix outer-fix)))))
! 	 (return-from parens-required-p (not (eq inner-fix outer-fix))))
! 
!     ;;(return-from parens-required-p (cons 7 t))))
!     (return-from parens-required-p t)))
! 
! ;;; Ramsey's function "bracket" (p. 15).
! (defun maybe-parenthesize-image (inner-fragment outer-kind direction)
!   (let ((inner-kind (car inner-fragment))
! 	(image      (cdr inner-fragment)))
!     (if (parens-required-p inner-kind outer-kind direction)
! 	(parenthesize-image image)
! 	image)))
! 
! ;;; Helper function to make the case statement in unparse1 smaller.
! ;;; Checks a few invariants also used by the hash-table filling functions.
! (defun fixity-case (kind)
!   (if (leaf-p kind)
!       ;; leaf
!       (if (operator-p kind)
! 	  ;; both -- error
! 	  (error "Argument ~S is both a leaf and an operator" kind)
! 	  ;; leaf only -- by definition, no children
! 	  :leaf)
!       ;; not leaf
!       (if (operator-p kind)
! 	  ;; operator only
! 	  (get-operator-prop kind :fixity)
! 	  ;; neither -- error
! 	  (error "Argument ~S is neither a leaf nor an operator" kind))))
! 
! 
! ;;; Ramsey's function "unparse'" (p. 16).
! ;;; DOCS differences from paper -- This one oesn't handle n-ary or decorated nodes.
! (defun unparse1 (node)
!   (let ((op (kind node))
! 	(l  (exp1 node))
! 	(r  (exp2 node)))
!     (ecase (fixity-case op)
!       (:leaf
!        (cons *max-precedence-operator*
! 	     (leaf-to-image node)))
!       (:prefix
!        (cons op
! 	     (prefix-image node
! 			   (maybe-parenthesize-image (unparse1 l)
! 						     op
! 						     :nonassoc))))
!       (:postfix
!        (cons op
! 	     (image-postfix (maybe-parenthesize-image (unparse1 l)
! 						      op
! 						      :nonassoc)
! 			    node)))
!       (:infix
!        (cons op
! 	     (image-infix-image (maybe-parenthesize-image (unparse1 l)
! 							  op
! 							  :left)
! 				node
! 				(maybe-parenthesize-image (unparse1 r)
! 							  op
! 							  :right)))))))
! 
! ;;; DOCS type string
! ;;; Ramsey's function "flatten" (p. 14).
! (defun image-to-string (image)
!   (if (null image)          (return-from image-to-string
! 			      ""))
!   (if (stringp image)       (return-from image-to-string
! 			      image))
!   ;; otherwise, assume image is a cons cell, which could be part of
!   ;;  a single-level list or a sublist
!   (if (stringp (car image)) (return-from image-to-string
! 			      (concatenate 'string
! 					   (car image)
! 					   (image-to-string (cdr image)))))
!   ;; the car of image isn't a string, so assume it's a cons cell and
!   ;;  forms a sublist
!   (return-from image-to-string
!     (concatenate 'string
! 		 (concatenate 'string
! 			      "("
! 			      (image-to-string (car image))
! 			      ")")
! 		 (image-to-string (cdr image)))))
! 
! ;;; Ramsey's function "unparse" (p. 15).
! (defun unparse (node)
!   (image-to-string (cdr (unparse1 node))))
! 
! (defmethod print-object ((r regexp) s)
!   (princ (image-to-string (cdr (unparse1 r))) s)


More information about the climacs-devel mailing list