[cmucl-cvs] CMUCL commit: src (4 files)

Fri Jun 10 17:38:27 UTC 2011

Date: Friday, June 10, 2011 @ 10:38:27
  Author: rtoy
    Path: /project/cmucl/cvsroot/src

Modified: code/exports.lisp code/print.lisp code/unidata.lisp
          general-info/release-20c.txt

Add function to load all unicode data into memory.

This makes it easy to make an executable image that doesn't need
unidata.bin around.  (Should we do this for normal cores?  It seems to
add about 1 MB to the core size.)

code/unidata.lisp:
o Add LOAD-ALL-UNICODE-DATA to load all unicode data.
o Add UNICODE-DATA-LOADED-P to check that unicode data has been
  loaded.

code/print.lisp:
o If unicode data is loaded, don't check for existence of
  *unidata-path*, because we don't need it.

code/exports.lisp:
o Export LOAD-ALL-UNICODE-DATA.

general-info/release-20c.txt:
o Update info


------------------------------+
 code/exports.lisp            |    5 +
 code/print.lisp              |   11 ++--
 code/unidata.lisp            |  105 +++++++++++++++++++++++++++++------------
 general-info/release-20c.txt |    3 +
 4 files changed, 88 insertions(+), 36 deletions(-)


Index: src/code/exports.lisp
diff -u src/code/exports.lisp:1.304 src/code/exports.lisp:1.305

--- src/code/exports.lisp:1.304	Wed Feb  2 04:51:27 2011
+++ src/code/exports.lisp	Fri Jun 10 10:38:27 2011
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/exports.lisp,v 1.304 2011/02/02 12:51:27 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/exports.lisp,v 1.305 2011/06/10 17:38:27 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -910,7 +910,8 @@
   #+unicode
   (:export "STRING-TO-NFC" "STRING-TO-NFD"
 	   "STRING-TO-NFKC" "STRING-TO-NFKD"
-	   "UNICODE-COMPLETE" "UNICODE-COMPLETE-NAME"))
+	   "UNICODE-COMPLETE" "UNICODE-COMPLETE-NAME"
+	   "LOAD-ALL-UNICODE-DATA"))
 
 (defpackage "EVAL"
   (:export "*EVAL-STACK-TRACE*" "*INTERNAL-APPLY-NODE-TRACE*"
Index: src/code/print.lisp
diff -u src/code/print.lisp:1.132 src/code/print.lisp:1.133
--- src/code/print.lisp:1.132	Tue May 31 06:26:40 2011
+++ src/code/print.lisp	Fri Jun 10 10:38:27 2011
@@ -5,7 +5,7 @@
 ;;; Carnegie Mellon University, and has been placed in the public domain.
 ;;;
 (ext:file-comment
-  "$Header: /project/cmucl/cvsroot/src/code/print.lisp,v 1.132 2011/05/31 13:26:40 rtoy Exp $")
+  "$Header: /project/cmucl/cvsroot/src/code/print.lisp,v 1.133 2011/06/10 17:38:27 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -2164,10 +2164,11 @@
 ;;; is initialized.
 #+unicode
 (defun reinit-char-attributes ()
-  (unless (probe-file *unidata-path*)
-    (cerror _"Continue anyway" _"Cannot find ~S, so unicode support is not available"
-	    *unidata-path*)
-    (return-from reinit-char-attributes nil))
+  (unless (unicode-data-loaded-p)
+    (unless (probe-file *unidata-path*)
+      (cerror _"Continue anyway" _"Cannot find ~S, so unicode support is not available"
+	      *unidata-path*)
+      (return-from reinit-char-attributes nil)))
   (flet ((set-bit (char bit)
 	   (let ((code (char-code char)))
 	     (setf (aref character-attributes code)
Index: src/code/unidata.lisp
diff -u src/code/unidata.lisp:1.26 src/code/unidata.lisp:1.27
--- src/code/unidata.lisp:1.26	Tue May 31 06:26:40 2011
+++ src/code/unidata.lisp	Fri Jun 10 10:38:27 2011
@@ -4,7 +4,7 @@
 ;;; This code was written by Paul Foley and has been placed in the public
 ;;; domain.
 ;;; 
-(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.26 2011/05/31 13:26:40 rtoy Exp $")
+(ext:file-comment "$Header: /project/cmucl/cvsroot/src/code/unidata.lisp,v 1.27 2011/06/10 17:38:27 rtoy Exp $")
 ;;;
 ;;; **********************************************************************
 ;;;
@@ -14,11 +14,12 @@
 (intl:textdomain "cmucl")
 
 (export '(string-to-nfd string-to-nfkc string-to-nfkd string-to-nfc
-	  unicode-complete unicode-complete-name))
+	  unicode-complete unicode-complete-name
+	  load-all-unicode-data))
 
 (defvar *unidata-path* "ext-formats:unidata.bin")
 
-(defvar *unidata-version* "$Revision: 1.26 $")
+(defvar *unidata-version* "$Revision: 1.27 $")
 
 (defstruct unidata
   range
@@ -473,33 +474,38 @@
     (let ((n (read32 stream)))
       (and (plusp n) (file-position stream n)))))
 
+;; List of all defined defloaders
+(defvar *defloaders* nil)
+
 (defmacro defloader (name (stm locn) &body body)
-  `(defun ,name ()
-     (labels ((read16 (stm)
-		(logior (ash (read-byte stm) 8) (read-byte stm)))
-	      (read32 (stm)
-		(logior (ash (read16 stm) 16) (read16 stm)))
-	      (read-ntrie (bits stm)
-		(let* ((split (read-byte stm))
-		       (hlen (read16 stm))
-		       (mlen (read16 stm))
-		       (llen (read16 stm))
-		       (hvec (make-array hlen
-				   :element-type '(unsigned-byte 16)))
-		       (mvec (make-array mlen
-				   :element-type '(unsigned-byte 16)))
-		       (lvec (make-array llen
-				   :element-type (list 'unsigned-byte bits))))
-		  (read-vector hvec stm :endian-swap :network-order)
-		  (read-vector mvec stm :endian-swap :network-order)
-		  (read-vector lvec stm :endian-swap :network-order)
-		  (values split hvec mvec lvec))))
-       (declare (ignorable #'read16 #'read32 #'read-ntrie))
-       (with-open-file (,stm *unidata-path* :direction :input
-			     :element-type '(unsigned-byte 8))
-	 (unless (unidata-locate ,stm ,locn)
-	   (error (intl:gettext "No data in file.")))
-	 , at body))))
+  `(progn
+     (push ',name *defloaders*)
+     (defun ,name ()
+       (labels ((read16 (stm)
+		  (logior (ash (read-byte stm) 8) (read-byte stm)))
+		(read32 (stm)
+		  (logior (ash (read16 stm) 16) (read16 stm)))
+		(read-ntrie (bits stm)
+		  (let* ((split (read-byte stm))
+			 (hlen (read16 stm))
+			 (mlen (read16 stm))
+			 (llen (read16 stm))
+			 (hvec (make-array hlen
+					   :element-type '(unsigned-byte 16)))
+			 (mvec (make-array mlen
+					   :element-type '(unsigned-byte 16)))
+			 (lvec (make-array llen
+					   :element-type (list 'unsigned-byte bits))))
+		    (read-vector hvec stm :endian-swap :network-order)
+		    (read-vector mvec stm :endian-swap :network-order)
+		    (read-vector lvec stm :endian-swap :network-order)
+		    (values split hvec mvec lvec))))
+	 (declare (ignorable #'read16 #'read32 #'read-ntrie))
+	 (with-open-file (,stm *unidata-path* :direction :input
+			       :element-type '(unsigned-byte 8))
+	   (unless (unidata-locate ,stm ,locn)
+	     (error (intl:gettext "No data in file.")))
+	   , at body)))))
 
 (defloader load-range (stm 0)
   (let* ((n (read32 stm))
@@ -1572,3 +1578,44 @@
 	    (incf p (length code))
 	    (return)))))
     (nreverse (coerce res 'vector))))
+
+;; This is primarily intended for users who what to create a core
+;; image that contains all of the unicode data.  By doing this, the
+;; resulting image no longer needs unidata.bin anymore.  This is
+;; useful for an executable image.
+(defun load-all-unicode-data ()
+  "Load all unicode data and set *UNIDATA-PATH* to NIL.
+Normally, the unicode data is loaded as needed.  This loads all of the
+data, which is useful for creating a core that no longer needs
+unidata.bin."
+  (dolist (loader (reverse *defloaders*))
+    (funcall loader))
+  t)
+
+;; CHeck to see if all of the unicode data has been loaded.
+(defun unicode-data-loaded-p ()
+  ;; FIXME: Would be nice to be able to do this automatically from the
+  ;; structure without having to list every slot here.
+  (and (unidata-range *unicode-data*)
+       (unidata-name+ *unicode-data*)
+       (unidata-name *unicode-data*)
+       (unidata-category *unicode-data*)
+       (unidata-scase *unicode-data*)
+       (unidata-numeric *unicode-data*)
+       (unidata-decomp *unicode-data*)
+       (unidata-combining *unicode-data*)
+       (unidata-bidi *unicode-data*)
+       (unidata-name1+ *unicode-data*)
+       (unidata-name1 *unicode-data*)
+       (unidata-qc-nfd *unicode-data*)
+       (unidata-qc-nfkd *unicode-data*)
+       (unidata-qc-nfc *unicode-data*)
+       (unidata-qc-nfkc *unicode-data*)
+       (unidata-comp-exclusions *unicode-data*)
+       (unidata-full-case-lower *unicode-data*)
+       (unidata-full-case-title *unicode-data*)
+       (unidata-full-case-upper *unicode-data*)
+       (unidata-case-fold-simple *unicode-data*)
+       (unidata-case-fold-full *unicode-data*)
+       (unidata-word-break *unicode-data*)
+       t))
Index: src/general-info/release-20c.txt
diff -u src/general-info/release-20c.txt:1.22 src/general-info/release-20c.txt:1.23
--- src/general-info/release-20c.txt:1.22	Sun Jun  5 13:39:04 2011
+++ src/general-info/release-20c.txt	Fri Jun 10 10:38:27 2011
@@ -51,6 +51,9 @@
     - Added -unidata command line option to allow user to specify the
       unidata.bin file to be used instead of the default one.
     - :CMUCL is now in *FEATURES*.
+    - Add LISP:LOAD-ALL-UNICODE-DATA to load all the Unicode
+      information into core.  This is useful for creating an
+      executable image that does not need unidata.bin.
 
   * ANSI compliance fixes:
     - Fixes for signaling errors with READ-CHAR and READ-BYTE