From rtoy at common-lisp.net Fri Nov 2 20:11:42 2007 From: rtoy at common-lisp.net (rtoy) Date: Fri, 2 Nov 2007 15:11:42 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-rep.lisp qd.lisp Message-ID: <20071102201142.65A0850034@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv8022 Modified Files: Tag: THREE-ARG-BRANCH qd-rep.lisp qd.lisp Log Message: First cut at adding a 3-arg versions of the basic operations to reduce consing by allowing the third argument to be a place where the result can be stored. This is intended to help reduce allocation and gc costs for Lisps that use arrays to represent quad-doubles. More work is needed to make the compiler macros do the right thing for CMUCL. qd-rep.lisp: o Add %STORE-QD-D to store a quad-double into a place. For CMUCL, there place argument is ignored and a fresh quad-double is created. qd.lisp: o Modify ADD-QD, SUB-QD, MUL-QD, and DIV-QD to take an optional third argument indicating where the result can be stored. Ignored on CMUCL. o Add ADD-QD-T, SUB-QD-T, MUL-QD-T, and DIV-QD-T, which are 3-arg functions with the third arg always required which is the storage area to hold the result. Ignored on CMUCL. o Add compiler macros to convert ADD-QD and friends to ADD-QD-T if the third arg is always given. The effect is, essentially, inlining ADD-QD. --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/10/16 17:09:46 1.10 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/02 20:11:42 1.10.2.1 @@ -81,6 +81,9 @@ (kernel:%make-double-double-float a2 a3))) ) +(defmacro %store-qd-d (target q0 q1 q2 q3) + (declare (ignore target)) + `(%make-qd-d ,q0 ,q1, q2, q3)) (defun qd-parts (qd) "Extract the four doubles comprising a quad-double and return them @@ -169,6 +172,14 @@ (setf (aref ,a 3) ,a3) ,a))) +(defmacro %store-qd-d (target q0 q1 q2 q3) + (let ((dest (gensym "TARGET-"))) + `(let ((,dest ,target)) + (setf (aref ,dest 0) ,q0) + (setf (aref ,dest 1) ,q1) + (setf (aref ,dest 2) ,q2) + (setf (aref ,dest 3) ,q3)))) + (defun qd-parts (qd) "Extract the four doubles comprising a quad-double and return them as multiple values. The most significant double is the first value." --- /project/oct/cvsroot/oct/qd.lisp 2007/10/18 14:38:11 1.60 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/02 20:11:42 1.60.2.1 @@ -179,10 +179,12 @@ make-qd-d add-qd-d add-d-qd add-qd-dd add-dd-qd - add-qd + add-qd add-qd-t neg-qd sub-qd sub-qd-dd sub-qd-d sub-d-qd - mul-qd-d mul-qd-dd mul-qd + mul-qd-d mul-qd-dd + mul-qd + mul-qd-t sqr-qd div-qd div-qd-d div-qd-dd make-qd-dd @@ -385,7 +387,11 @@ ;; which don't do a very good job with dataflow. CMUCL is one of ;; those compilers. -(defun add-qd (a b) +(defun add-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (add-qd-t a b target)) + + +(defun add-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) (space 0))) @@ -407,7 +413,7 @@ (inline float-infinity-p)) (when (float-infinity-p s0) - (return-from add-qd (%make-qd-d s0 0d0 0d0 0d0))) + (return-from add-qd-t (%store-qd-d target s0 0d0 0d0 0d0))) (let ((v0 (cl:- s0 a0)) (v1 (cl:- s1 a1)) (v2 (cl:- s2 a2)) @@ -441,8 +447,22 @@ (multiple-value-setq (s0 s1 s2 s3) (renorm-5 s0 s1 s2 s3 t0)) (if (and (zerop a0) (zerop b0)) - (%make-qd-d (+ a0 b0) 0d0 0d0 0d0) - (%make-qd-d s0 s1 s2 s3))))))))))) + (%store-qd-d target (+ a0 b0) 0d0 0d0 0d0) + (%store-qd-d target s0 s1 s2 s3))))))))))) + +;; Define some compiler macros to transform add-qd to add-qd-t +;; directly. For CMU, we always replace the parameter C with NIL +;; because we don't use it. For other Lisps, we create the necessary +;; object and call add-qd-t. +#+cmu +(define-compiler-macro add-qd (a b &optional c) + (if c + `(setf c (add-qd-t ,a ,b nil)) + `(add-qd-t ,a ,b nil))) + +#-cmu +(define-compiler-macro add-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-qd-t ,a ,b ,c)) (defun neg-qd (a) (declare (type %quad-double a)) @@ -451,9 +471,19 @@ (declare (double-float a0 a1 a2 a3)) (%make-qd-d (cl:- a0) (cl:- a1) (cl:- a2) (cl:- a3)))) -(defun sub-qd (a b) +(defun sub-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type %quad-double a b)) - (add-qd a (neg-qd b))) + (add-qd-t a (neg-qd b) target)) + +#+cmu +(define-compiler-macro sub-qd (a b &optional c) + (if c + `(setf ,c `(add-qd-t ,a (neg-qd ,b) nil)) + `(add-qd-t ,a (neg-qd ,b) nil))) + +#-cmu +(define-compiler-macro sub-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-qd-t ,a (neg-qd ,b) ,c)) #+cmu (defun sub-qd-dd (a b) @@ -602,7 +632,11 @@ ;; ;; Clisp says ;; 14.142135623730950488016887242096980785696718753769480731766797379908L0 -(defun mul-qd (a b) + +(defun mul-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (mul-qd-t a b target)) + +(defun mul-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) (space 0)) @@ -617,7 +651,7 @@ (two-prod a0 b0) #+cmu (when (float-infinity-p p0) - (return-from mul-qd (%make-qd-d p0 0d0 0d0 0d0))) + (return-from mul-qd-t (%store-qd-d target p0 0d0 0d0 0d0))) (multiple-value-bind (p1 q1) (two-prod a0 b1) (multiple-value-bind (p2 q2) @@ -662,8 +696,19 @@ (multiple-value-bind (r0 r1 s0 s1) (renorm-5 p0 p1 s0 s1 s2) (if (zerop r0) - (%make-qd-d p0 0d0 0d0 0d0) - (%make-qd-d r0 r1 s0 s1)))))))))))))) + (%store-qd-d target p0 0d0 0d0 0d0) + (%store-qd-d target r0 r1 s0 s1)))))))))))))) + +#+cmu +(define-compiler-macro mul-qd (a b &optional c) + (if c + `(setf ,c `(mul-qd-t ,a ,b nil)) + `(mul-qd-t ,a ,b nil))) + +#-cmu +(define-compiler-macro mul-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) + `(mul-qd-t ,a ,b ,c)) + ;; This is the non-sloppy version. I think this works just fine, but ;; since qd defaults to the sloppy multiplication version, we do the @@ -813,7 +858,10 @@ (%make-qd-d a0 a1 a2 a3))))))))) -(defun div-qd (a b) +(defun div-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (div-qd-t a b target)) + +(defun div-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) (space 0)) @@ -825,14 +873,25 @@ (q1 (cl:/ (qd-0 r) b0))) (when (float-infinity-p q0) - (return-from div-qd (%make-qd-d q0 0d0 0d0 0d0))) + (return-from div-qd-t (%store-qd-d target q0 0d0 0d0 0d0))) (setf r (sub-qd r (mul-qd-d b q1))) (let ((q2 (cl:/ (qd-0 r) b0))) (setf r (sub-qd r (mul-qd-d b q2))) (let ((q3 (cl:/ (qd-0 r) b0))) (multiple-value-bind (q0 q1 q2 q3) (renorm-4 q0 q1 q2 q3) - (%make-qd-d q0 q1 q2 q3))))))) + (%store-qd-d target q0 q1 q2 q3))))))) + +#+cmu +(define-compiler-macro div-qd (a b &optional c) + (if c + `(setf ,c `(div-qd-t ,a ,b nil)) + `(div-qd-t ,a ,b nil))) + +#-cmu +(define-compiler-macro div-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) + `(div-qd-t ,a ,b ,c)) + (declaim (inline invert-qd)) From rtoy at common-lisp.net Fri Nov 2 20:45:32 2007 From: rtoy at common-lisp.net (rtoy) Date: Fri, 2 Nov 2007 15:45:32 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-rep.lisp Message-ID: <20071102204532.B89FA7A001@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv14009 Modified Files: Tag: THREE-ARG-BRANCH qd-rep.lisp Log Message: Oops. %STORE-QD-D wasn't returning the target value. --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/02 20:11:42 1.10.2.1 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/02 20:45:32 1.10.2.2 @@ -178,7 +178,8 @@ (setf (aref ,dest 0) ,q0) (setf (aref ,dest 1) ,q1) (setf (aref ,dest 2) ,q2) - (setf (aref ,dest 3) ,q3)))) + (setf (aref ,dest 3) ,q3) + ,dest))) (defun qd-parts (qd) "Extract the four doubles comprising a quad-double and return them From rtoy at common-lisp.net Sun Nov 4 02:45:01 2007 From: rtoy at common-lisp.net (rtoy) Date: Sat, 3 Nov 2007 21:45:01 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-rep.lisp qd.lisp Message-ID: <20071104024501.E4C935D164@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv6695 Modified Files: Tag: THREE-ARG-BRANCH qd-rep.lisp qd.lisp Log Message: o Move compiler macros from qd.lisp to qd-rep.lisp o Declare add-qd-t, mul-qd-t and div-qd-t as inline functions so that everything is still fast on cmucl. --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/02 20:45:32 1.10.2.2 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/04 02:45:01 1.10.2.3 @@ -233,3 +233,28 @@ (declare (ignore x)) nil) ) ; end progn + + +(macrolet + ((frob (qd qd-t) + #+cmu + `(define-compiler-macro ,qd (a b &optional c) + (if c + `(setf ,c (,',qd-t ,a ,b nil)) + `(,',qd-t ,a ,b nil))) + #-cmu + `(define-compiler-macro ,qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) + `(,',qd-t ,a ,b ,c)))) + (frob add-qd add-qd-t) + (frob mul-qd mul-qd-t) + (frob div-qd div-qd-t)) + +#+cmu +(define-compiler-macro sub-qd (a b &optional c) + (if c + `(setf ,c (add-qd-t ,a (neg-qd ,b) nil)) + `(add-qd-t ,a (neg-qd ,b) nil))) + +#-cmu +(define-compiler-macro sub-qd (a b &optional c) + `(add-qd-t ,a (neg-qd ,b) ,c)) --- /project/oct/cvsroot/oct/qd.lisp 2007/11/02 20:11:42 1.60.2.1 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/04 02:45:01 1.60.2.2 @@ -161,11 +161,14 @@ add-qd-d add-qd-dd add-qd + add-qd-t mul-qd-d mul-qd-dd mul-qd + mul-qd-t sqr-qd div-qd + div-qd-t div-qd-d div-qd-dd)) @@ -187,6 +190,7 @@ mul-qd-t sqr-qd div-qd div-qd-d div-qd-dd + div-qd-t make-qd-dd )) @@ -394,7 +398,9 @@ (defun add-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) - (space 0))) + (space 0)) + #+cmu + (ignore target)) ;; This is the version that is NOT IEEE. Should we use the IEEE ;; version? It's quite a bit more complicated. ;; @@ -454,16 +460,6 @@ ;; directly. For CMU, we always replace the parameter C with NIL ;; because we don't use it. For other Lisps, we create the necessary ;; object and call add-qd-t. -#+cmu -(define-compiler-macro add-qd (a b &optional c) - (if c - `(setf c (add-qd-t ,a ,b nil)) - `(add-qd-t ,a ,b nil))) - -#-cmu -(define-compiler-macro add-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) - `(add-qd-t ,a ,b ,c)) - (defun neg-qd (a) (declare (type %quad-double a)) (with-qd-parts (a0 a1 a2 a3) @@ -476,16 +472,6 @@ (add-qd-t a (neg-qd b) target)) #+cmu -(define-compiler-macro sub-qd (a b &optional c) - (if c - `(setf ,c `(add-qd-t ,a (neg-qd ,b) nil)) - `(add-qd-t ,a (neg-qd ,b) nil))) - -#-cmu -(define-compiler-macro sub-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) - `(add-qd-t ,a (neg-qd ,b) ,c)) - -#+cmu (defun sub-qd-dd (a b) (declare (type %quad-double a) (type double-double-float b)) @@ -699,16 +685,6 @@ (%store-qd-d target p0 0d0 0d0 0d0) (%store-qd-d target r0 r1 s0 s1)))))))))))))) -#+cmu -(define-compiler-macro mul-qd (a b &optional c) - (if c - `(setf ,c `(mul-qd-t ,a ,b nil)) - `(mul-qd-t ,a ,b nil))) - -#-cmu -(define-compiler-macro mul-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) - `(mul-qd-t ,a ,b ,c)) - ;; This is the non-sloppy version. I think this works just fine, but ;; since qd defaults to the sloppy multiplication version, we do the @@ -865,7 +841,9 @@ (declare (type %quad-double a b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu + (ignore target)) (let ((a0 (qd-0 a)) (b0 (qd-0 b))) (let* ((q0 (cl:/ a0 b0)) @@ -882,17 +860,6 @@ (renorm-4 q0 q1 q2 q3) (%store-qd-d target q0 q1 q2 q3))))))) -#+cmu -(define-compiler-macro div-qd (a b &optional c) - (if c - `(setf ,c `(div-qd-t ,a ,b nil)) - `(div-qd-t ,a ,b nil))) - -#-cmu -(define-compiler-macro div-qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) - `(div-qd-t ,a ,b ,c)) - - (declaim (inline invert-qd)) (defun invert-qd(v) ;; a quartic newton iteration for 1/v From rtoy at common-lisp.net Sun Nov 4 02:51:39 2007 From: rtoy at common-lisp.net (rtoy) Date: Sat, 3 Nov 2007 21:51:39 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-extra.lisp Message-ID: <20071104025139.2ED571128@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv8162 Modified Files: Tag: THREE-ARG-BRANCH qd-extra.lisp Log Message: Update timing data. --- /project/oct/cvsroot/oct/qd-extra.lisp 2007/10/26 15:48:15 1.5 +++ /project/oct/cvsroot/oct/qd-extra.lisp 2007/11/04 02:51:39 1.5.2.1 @@ -1099,14 +1099,14 @@ ;; Time Sparc PPC x86 PPC (fma) Sparc2 ;; exp-qd/reduce 2.06 3.18 10.46 2.76 6.12 ;; expm1-qd/series 8.81 12.24 18.87 3.26 29.0 -;; expm1-qd/dup 5.68 4.34 18.47 3.64 18.78 -;; exp-qd/pade 1.53 +;; expm1-qd/dup 5.68 4.34 18.47 3.64 9.77 +;; exp-qd/pade 1.53 4.51 ;; ;; Consing (MB) Sparc ;; exp-qd/reduce 45 45 638 44.4 45 ;; expm1-qd/series 519 519 1201 14.8 519 ;; expm1-qd/dup 32 32 1224 32.0 32 -;; exp-qd/pade 44 +;; exp-qd/pade 44 44 ;; ;; Speeds seem to vary quite a bit between architectures. ;; From rtoy at common-lisp.net Sun Nov 4 03:00:56 2007 From: rtoy at common-lisp.net (rtoy) Date: Sat, 3 Nov 2007 22:00:56 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-rep.lisp qd.lisp Message-ID: <20071104030056.54D6C706A@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv10089 Modified Files: Tag: THREE-ARG-BRANCH qd-rep.lisp qd.lisp Log Message: o Add support for SQR-QD-T o Add compiler macro for SQR-QD. --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/04 02:45:01 1.10.2.3 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/04 03:00:56 1.10.2.4 @@ -258,3 +258,14 @@ #-cmu (define-compiler-macro sub-qd (a b &optional c) `(add-qd-t ,a (neg-qd ,b) ,c)) + +#+cmu +(define-compiler-macro sqr-qd (a &optional c) + (if c + `(setf ,c (sqr-qd-t ,a nil)) + `(sqr-qd-t ,a nil))) + +#-cmu +(define-compiler-macro sqr-qd (a b &optional c) + `(sqr-qd-t ,a ,c)) + --- /project/oct/cvsroot/oct/qd.lisp 2007/11/04 02:45:01 1.60.2.2 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/04 03:00:56 1.60.2.3 @@ -626,7 +626,9 @@ (declare (type %quad-double a b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu + (ignore target)) (with-qd-parts (a0 a1 a2 a3) a (declare (double-float a0 a1 a2 a3)) @@ -787,11 +789,16 @@ (multiple-value-call #'%make-qd-d (renorm-5 p0 p1 s0 t0 t1)))))))))))))))))))) -(defun sqr-qd (a) +(defun sqr-qd (a &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (sqr-qd-t a target)) + +(defun sqr-qd-t (a target) "Square A" (declare (type %quad-double a) (optimize (speed 3) - (space 0))) + (space 0)) + #+cmu + (ignore target)) (multiple-value-bind (p0 q0) (two-sqr (qd-0 a)) (multiple-value-bind (p1 q1) @@ -831,7 +838,7 @@ (multiple-value-bind (a0 a1 a2 a3) (renorm-5 p0 p1 p2 p3 p4) - (%make-qd-d a0 a1 a2 a3))))))))) + (%store-qd-d target a0 a1 a2 a3))))))))) (defun div-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) From rtoy at common-lisp.net Sun Nov 4 16:31:45 2007 From: rtoy at common-lisp.net (rtoy) Date: Sun, 4 Nov 2007 11:31:45 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd.lisp Message-ID: <20071104163145.5926744065@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv18778 Modified Files: Tag: THREE-ARG-BRANCH qd.lisp Log Message: Oops. Make sqr-qd-t inline. --- /project/oct/cvsroot/oct/qd.lisp 2007/11/04 03:00:56 1.60.2.3 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/04 16:31:45 1.60.2.4 @@ -167,6 +167,7 @@ mul-qd mul-qd-t sqr-qd + sqr-qd-t div-qd div-qd-t div-qd-d From rtoy at common-lisp.net Mon Nov 5 16:00:41 2007 From: rtoy at common-lisp.net (rtoy) Date: Mon, 5 Nov 2007 11:00:41 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd.lisp Message-ID: <20071105160041.4E6DC55356@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv7505 Modified Files: Tag: THREE-ARG-BRANCH qd.lisp Log Message: Cosmetic --- /project/oct/cvsroot/oct/qd.lisp 2007/11/04 16:31:45 1.60.2.4 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/05 16:00:39 1.60.2.5 @@ -870,7 +870,8 @@ (declaim (inline invert-qd)) -(defun invert-qd(v) ;; a quartic newton iteration for 1/v +(defun invert-qd (v) + ;; a quartic newton iteration for 1/v ;; to invert v, start with a good guess, x. ;; let h= 1-v*x ;; h is small ;; return x+ x*(h+h^2+h^3) . compute h3 in double-float From rtoy at common-lisp.net Mon Nov 5 16:00:53 2007 From: rtoy at common-lisp.net (rtoy) Date: Mon, 5 Nov 2007 11:00:53 -0500 (EST) Subject: [oct-cvs] Oct commit: oct timing2.lisp Message-ID: <20071105160053.1658B55395@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv7545 Modified Files: Tag: THREE-ARG-BRANCH timing2.lisp Log Message: Update timings. --- /project/oct/cvsroot/oct/timing2.lisp 2007/10/16 14:21:13 1.4 +++ /project/oct/cvsroot/oct/timing2.lisp 2007/11/05 16:00:53 1.4.2.1 @@ -117,140 +117,143 @@ Test Time qd oct ---- ----------- -add 0.023 0.09 -mul 0.075 0.13 -div 0.299 0.29 -sqrt 0.105 0.11 -sin 0.115 0.14 -log 0.194 0.12 - -Times are in sec for the test. The default number of iterations were -used. Most of the timings match my expectations, including the log -test. Oct uses a different algorithm (Halley's method) which is -faster (in Lisp) than the algorithm used in qd (Newtwon iteration). +add 0.23 1.16 +mul 0.749 1.54 +div 3.00 3.11 +sqrt 10.57 12.2 +sin 57.33 64.5 +log 194 119 + +Times are in microsec/operation for the test. The default number of +iterations were used. Most of the timings match my expectations, +including the log test. Oct uses a different algorithm (Halley's +method) which is faster (in Lisp) than the algorithm used in qd +(Newtwon iteration). +Not also that these times include the 3-arg versions of the routines. ------------------------------------------------------------------------------- The raw data: The output from qd_timer -qd -v: +Timing qd_real +-------------- + Timing addition... -n = 100000 t = 0.0231462 -b = 1.428571e+04 -100000 operations in 0.0231462 s. - 0.231462 us +n = 1000000 t = 0.236154 +b = 142857.142857 +1000000 operations in 0.236154 s. + 0.236154 us Timing multiplication ... -n = 100000 t = 0.0749929 -b = 2.718268e+00 -100000 operations in 0.0749929 s. - 0.749929 us +n = 1000000 t = 0.748933 +b = 2.718280 +1000000 operations in 0.748933 s. + 0.748933 us Timing division ... -n = 100000 t = 0.298858 -b = 0.367881 -100000 operations in 0.298858 s. - 2.988580 us +n = 1000000 t = 3.004328 +b = 0.367880 +1000000 operations in 3.004328 s. + 3.004328 us Timing square root ... -n = 10000 t = 0.105049 +n = 100000 t = 1.057170 a = 2.821980 -10000 operations in 0.105049 s. - 10.504860 us +100000 operations in 1.057170 s. + 10.571696 us Timing sin ... -n = 2000 t = 0.114943 +n = 20000 t = 1.146667 a = 3.141593 -2000 operations in 0.114943 s. - 57.471350 us +20000 operations in 1.146667 s. + 57.333335 us Timing log ... -n = 1000 t = 0.193698 +n = 10000 t = 1.939869 a = -50.100000 -1000 operations in 0.193698 s. -193.697800 us -The output from CMUCL: +10000 operations in 1.939869 s. +193.986900 us + +-------------------------------------------------- +CMUCL results: -QD> (time-add) +CL-USER> (oct::time-add 1000000) ; Evaluation took: -; 0.09 seconds of real time -; 0.1 seconds of user run time -; 0.0 seconds of system run time -; 147,285,856 CPU cycles +; 1.16 seconds of real time +; 0.98 seconds of user run time +; 0.18 seconds of system run time +; 1,845,637,904 CPU cycles ; 0 page faults and -; 7,200,016 bytes consed. +; 72,000,248 bytes consed. ; -n = 100000 -b = #q14285.7142857142857142857142857142857142857142857142857142857142855q0 -NIL -QD> (time-mul) +n = 1000000 +b = #q142857.142857142857142857142857142857142857142857142857142857142854q0 + +CL-USER> (oct::time-mul 1000000) ; Evaluation took: -; 0.13 seconds of real time -; 0.1 seconds of user run time -; 0.02 seconds of system run time -; 203,790,588 CPU cycles +; 1.53 seconds of real time +; 1.27 seconds of user run time +; 0.25 seconds of system run time +; 2,430,859,732 CPU cycles ; 0 page faults and -; 7,200,824 bytes consed. +; 72,000,248 bytes consed. ; -n = 100000 -b = #q2.71826823717448966803506482442604644797444693267782286300915989397q0 -NIL -QD> (time-div) +n = 1000000 +b = #q2.71828046931937688381979970845435639275164502668250771294016782123q0 + +CL-USER> (oct::time-div 1000000) ; Evaluation took: -; 0.29 seconds of real time -; 0.28 seconds of user run time -; 0.01 seconds of system run time -; 460,956,912 CPU cycles +; 3.11 seconds of real time +; 2.94 seconds of user run time +; 0.16 seconds of system run time +; 4,957,512,968 CPU cycles ; 0 page faults and -; 7,200,016 bytes consed. +; 72,000,248 bytes consed. ; -n = 100000 -b = #q0.36788128056098406210328658773118942247132502490133718973918140856q0 -NIL -QD> (time-sqrt 10000) +n = 1000000 +b = #q0.367879625111086265804761271038216553876450599098470428879260437304q0 +CL-USER> (oct::time-sqrt 100000) ; Evaluation took: -; 0.11 seconds of real time -; 0.1 seconds of user run time -; 0.0 seconds of system run time -; 173,209,708 CPU cycles +; 1.22 seconds of real time +; 1.1 seconds of user run time +; 0.1 seconds of system run time +; 1,938,798,996 CPU cycles ; 0 page faults and -; 2,402,560 bytes consed. +; 24,000,128 bytes consed. ; -n = 10000 +n = 100000 a = #q2.82198033014704783016853125515542796898998765943212617578596649019q0 -NIL -QD> (time-sin) + +CL-USER> (oct::time-sin 20000) ; Evaluation took: -; 0.14 seconds of real time -; 0.14 seconds of user run time -; 0.0 seconds of system run time -; 213,378,476 CPU cycles +; 1.29 seconds of real time +; 1.24 seconds of user run time +; 0.05 seconds of system run time +; 2,053,157,408 CPU cycles ; 0 page faults and -; 3,105,800 bytes consed. +; 27,751,144 bytes consed. ; -n = 2000 -a = #q3.14159265358979323846264338327950288419716939937510582097494459409q0 -NIL -QD> (time-log) +n = 20000 +a = #q3.14159265358979323846264338327950288419716939937510582097494458294q0 + +CL-USER> (oct::time-log 10000) ; Evaluation took: -; 0.12 seconds of real time -; 0.12 seconds of user run time -; 0.01 seconds of system run time -; 192,187,304 CPU cycles +; 1.19 seconds of real time +; 1.13 seconds of user run time +; 0.04 seconds of system run time +; 1,890,677,952 CPU cycles ; 0 page faults and -; 1,621,792 bytes consed. +; 16,197,536 bytes consed. ; -n = 1000 -a = #q-50.100000000000000000000000000000000000000000000000000000000208796q0 -NIL -QD> +n = 10000 +a = #q-50.100000000000000000000000000000000000000000000000000000552824575q0 ---------------------------------------------- ||# From rtoy at common-lisp.net Wed Nov 7 03:08:29 2007 From: rtoy at common-lisp.net (rtoy) Date: Tue, 6 Nov 2007 22:08:29 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-rep.lisp qd.lisp Message-ID: <20071107030829.918B53F011@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv7195 Modified Files: Tag: THREE-ARG-BRANCH qd-rep.lisp qd.lisp Log Message: o Add 3-arg forms for add-qd-d, mul-qd-d, add-d-qd, sub-qd-d, sub-d-qd, and neg-qd. o Correct the compiler macros for CMUCL for sub-qd and sqr-qd. --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/04 03:00:56 1.10.2.4 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/07 03:08:26 1.10.2.5 @@ -247,7 +247,9 @@ `(,',qd-t ,a ,b ,c)))) (frob add-qd add-qd-t) (frob mul-qd mul-qd-t) - (frob div-qd div-qd-t)) + (frob div-qd div-qd-t) + (frob add-qd-d add-qd-d-t) + (frob mul-qd-d mul-qd-d-t)) #+cmu (define-compiler-macro sub-qd (a b &optional c) @@ -256,7 +258,7 @@ `(add-qd-t ,a (neg-qd ,b) nil))) #-cmu -(define-compiler-macro sub-qd (a b &optional c) +(define-compiler-macro sub-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) `(add-qd-t ,a (neg-qd ,b) ,c)) #+cmu @@ -266,6 +268,46 @@ `(sqr-qd-t ,a nil))) #-cmu -(define-compiler-macro sqr-qd (a b &optional c) +(define-compiler-macro sqr-qd (a &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) `(sqr-qd-t ,a ,c)) +#+cmu +(define-compiler-macro add-d-qd (a b &optional c) + (if c + `(setf ,c (add-qd-d ,b ,a)) + `(add-qd-d ,b ,a))) + +#-cmu +(define-compiler-macro add-d-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-qd-d ,b ,a ,c)) + +#+cmu +(define-compiler-macro sub-qd-d (a b &optional c) + (if c + `(setf ,c (add-qd-d ,a (cl:- ,b))) + `(add-qd-d ,a (cl:- ,b)))) + +#-cmu +(define-compiler-macro sub-qd-d (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-qd-d ,a (cl:- ,b) ,c)) + +#+cmu +(define-compiler-macro sub-d-qd (a b &optional c) + (if c + `(setf ,c (add-d-qd ,a (neg-qd ,b))) + `(add-d-qd ,a (neg-qd ,b)))) + +#-cmu +(define-compiler-macro sub-d-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-d-qd a (neg-qd ,b) ,c)) + +#+cmu +(define-compiler-macro neg-qd (a &optional c) + (if c + `(setf ,c (neg-qd-t ,a nil)) + `(neg-qd-t ,a nil))) + +#-cmu +(define-compiler-macro neg-qd (a &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(neg-qd-t ,a ,c)) + --- /project/oct/cvsroot/oct/qd.lisp 2007/11/05 16:00:39 1.60.2.5 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/07 03:08:26 1.60.2.6 @@ -132,6 +132,7 @@ add-d-qd add-dd-qd neg-qd + neg-qd-t sub-qd sub-qd-dd sub-qd-d @@ -159,10 +160,12 @@ renorm-4 renorm-5 add-qd-d + add-qd-d-t add-qd-dd add-qd add-qd-t mul-qd-d + mul-qd-d-t mul-qd-dd mul-qd mul-qd-t @@ -300,13 +303,17 @@ ;;;; Addition ;; Quad-double + double -(defun add-qd-d (a b) +(defun add-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (add-qd-d-t a b target)) + +(defun add-qd-d-t (a b target) "Add a quad-double A and a double-float B" (declare (type %quad-double a) (double-float b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu (ignore target)) (let* ((c0 0d0) (e c0) (c1 c0) @@ -316,21 +323,22 @@ (two-sum c0 e (qd-0 a) b) (when (float-infinity-p c0) - (return-from add-qd-d (%make-qd-d c0 0d0 0d0 0d0))) + (return-from add-qd-d-t (%store-qd-d target c0 0d0 0d0 0d0))) (two-sum c1 e (qd-1 a) e) (two-sum c2 e (qd-2 a) e) (two-sum c3 e (qd-3 a) e) (multiple-value-bind (r0 r1 r2 r3) (renorm-5 c0 c1 c2 c3 e) (if (and (zerop (qd-0 a)) (zerop b)) - (%make-qd-d c0 0d0 0d0 0d0) - (%make-qd-d r0 r1 r2 r3))))) + (%store-qd-d target c0 0d0 0d0 0d0) + (%store-qd-d target r0 r1 r2 r3))))) -(defun add-d-qd (a b) +(defun add-d-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (double-float a) (type %quad-double b) - (optimize (speed 3))) - (add-qd-d b a)) + (optimize (speed 3)) + #+cmu (ignore target)) + (add-qd-d b a #-cmu target)) #+cmu (defun add-qd-dd (a b) @@ -461,12 +469,16 @@ ;; directly. For CMU, we always replace the parameter C with NIL ;; because we don't use it. For other Lisps, we create the necessary ;; object and call add-qd-t. -(defun neg-qd (a) - (declare (type %quad-double a)) +(defun neg-qd (a &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (neg-qd-t a target)) + +(defun neg-qd-t (a target) + (declare (type %quad-double a) + #+cmu (ignore target)) (with-qd-parts (a0 a1 a2 a3) a (declare (double-float a0 a1 a2 a3)) - (%make-qd-d (cl:- a0) (cl:- a1) (cl:- a2) (cl:- a3)))) + (%store-qd-d target (cl:- a0) (cl:- a1) (cl:- a2) (cl:- a3)))) (defun sub-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type %quad-double a b)) @@ -478,16 +490,18 @@ (type double-double-float b)) (add-qd-dd a (cl:- b))) -(defun sub-qd-d (a b) +(defun sub-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type %quad-double a) - (type double-float b)) - (add-qd-d a (cl:- b))) + (type double-float b) + #+cmu (ignore target)) + (add-qd-d a (cl:- b) #-cmu target)) -(defun sub-d-qd (a b) +(defun sub-d-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type double-float a) - (type %quad-double b)) + (type %quad-double b) + #+cmu (ignore target)) ;; a - b = a + (-b) - (add-d-qd a (neg-qd b))) + (add-d-qd a (neg-qd b) #-cmu target)) ;; Works @@ -497,18 +511,22 @@ ;; Clisp says ;; 14.142135623730950488016887242096980785696718753769480731766797379908L0 ;; -(defun mul-qd-d (a b) +(defun mul-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (mul-qd-d-t a b target)) + +(defun mul-qd-d-t (a b target) "Multiply quad-double A with B" (declare (type %quad-double a) (double-float b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu (ignore target)) (multiple-value-bind (p0 q0) (two-prod (qd-0 a) b) (when (float-infinity-p p0) - (return-from mul-qd-d (%make-qd-d p0 0d0 0d0 0d0))) + (return-from mul-qd-d-t (%store-qd-d target p0 0d0 0d0 0d0))) (multiple-value-bind (p1 q1) (two-prod (qd-1 a) b) (declare (double-float p1 q1)) @@ -528,8 +546,8 @@ (multiple-value-bind (s0 s1 s2 s3) (renorm-5 s0 s1 s2 s3 s4) (if (zerop s0) - (%make-qd-d (float-sign p0 0d0) 0d0 0d0 0d0) - (%make-qd-d s0 s1 s2 s3))))))))) + (%store-qd-d target (float-sign p0 0d0) 0d0 0d0 0d0) + (%store-qd-d target s0 s1 s2 s3))))))))) ;; a0 * b0 0 ;; a0 * b1 1 From rtoy at common-lisp.net Wed Nov 7 03:45:41 2007 From: rtoy at common-lisp.net (rtoy) Date: Tue, 6 Nov 2007 22:45:41 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-fun.lisp qd-rep.lisp qd.lisp Message-ID: <20071107034541.653AB67045@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv16164 Modified Files: Tag: THREE-ARG-BRANCH qd-fun.lisp qd-rep.lisp qd.lisp Log Message: qd-rep.lisp: o Fix typo in compiler macro for sub-d-qd qd.lisp: o Use 3-arg versions in div-qd-t to speed things up. Approximately doubles the speed with clisp. qd-fun.lisp: o Use 3-arg versions in sqrt-qd to speed things up. Approximately doubles the speed with clisp. --- /project/oct/cvsroot/oct/qd-fun.lisp 2007/10/18 14:38:56 1.90 +++ /project/oct/cvsroot/oct/qd-fun.lisp 2007/11/07 03:45:41 1.90.2.1 @@ -47,6 +47,7 @@ #+cmu (declaim (ext:maybe-inline sqrt-qd)) +#+nil (defun sqrt-qd (a) "Square root of the (non-negative) quad-float" (declare (type %quad-double a) @@ -79,6 +80,47 @@ (setf r (add-qd r (mul-qd r (sub-d-qd half (mul-qd h (sqr-qd r))))))) (scale-float-qd (mul-qd r new-a) (ash k -1))))) +(defun sqrt-qd (a) + "Square root of the (non-negative) quad-float" + (declare (type %quad-double a) + (optimize (speed 3) (space 0))) + ;; Perform the following Newton iteration: + ;; + ;; x' = x + (1 - a * x^2) * x / 2 + ;; + ;; which converges to 1/sqrt(a). + ;; + ;; However, there appear to be round-off errors when x is either + ;; very large or very small. So let x = f*2^(2*k). Then sqrt(x) = + ;; 2^k*sqrt(f), and sqrt(f) doesn't have round-off problems. + (when (zerop-qd a) + (return-from sqrt-qd a)) + (when (float-infinity-p (qd-0 a)) + (return-from sqrt-qd a)) + + (let* ((k (logandc2 (logb-finite (qd-0 a)) 1)) + (new-a (scale-float-qd a (- k)))) + (assert (evenp k)) + (let* ((r (make-qd-d (cl:/ (sqrt (the (double-float (0d0)) + (qd-0 new-a)))))) + (temp (%make-qd-d 0d0 0d0 0d0 0d0)) + (half 0.5d0) + (h (mul-qd-d new-a half))) + (declare (type %quad-double r)) + ;; Since we start with double-float precision, three more + ;; iterations should give us full accuracy. + (dotimes (k 3) + #+nil + (setf r (add-qd r (mul-qd r (sub-d-qd half (mul-qd h (sqr-qd r)))))) + (sqr-qd r temp) + (mul-qd h temp temp) + (sub-d-qd half temp temp) + (mul-qd r temp temp) + (add-qd r temp r) + ) + (mul-qd r new-a r) + (scale-float-qd r (ash k -1))))) + (defun hypot-aux-qd (x y) (declare (type %quad-double x y)) (let ((k (- (logb-finite (max (cl:abs (qd-0 x)) --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/07 03:08:26 1.10.2.5 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/07 03:45:41 1.10.2.6 @@ -299,7 +299,7 @@ #-cmu (define-compiler-macro sub-d-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) - `(add-d-qd a (neg-qd ,b) ,c)) + `(add-d-qd ,a (neg-qd ,b) ,c)) #+cmu (define-compiler-macro neg-qd (a &optional c) --- /project/oct/cvsroot/oct/qd.lisp 2007/11/07 03:08:26 1.60.2.6 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/07 03:45:41 1.60.2.7 @@ -863,6 +863,7 @@ (defun div-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (div-qd-t a b target)) +#+nil (defun div-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) @@ -886,6 +887,33 @@ (renorm-4 q0 q1 q2 q3) (%store-qd-d target q0 q1 q2 q3))))))) +(defun div-qd-t (a b target) + (declare (type %quad-double a b) + (optimize (speed 3) + (space 0)) + (inline float-infinity-p) + #+cmu + (ignore target)) + (let ((a0 (qd-0 a)) + (b0 (qd-0 b))) + (let* ((q0 (cl:/ a0 b0)) + (r (%make-qd-d 0d0 0d0 0d0 0d0))) + (mul-qd-d b q0 r) + (sub-qd a r r) + (let* ((q1 (cl:/ (qd-0 r) b0)) + (temp (mul-qd-d b q1))) + (when (float-infinity-p q0) + (return-from div-qd-t (%store-qd-d target q0 0d0 0d0 0d0))) + + (sub-qd r temp r) + (let ((q2 (cl:/ (qd-0 r) b0))) + (mul-qd-d b q2 temp) + (sub-qd r temp r) + (let ((q3 (cl:/ (qd-0 r) b0))) + (multiple-value-bind (q0 q1 q2 q3) + (renorm-4 q0 q1 q2 q3) + (%store-qd-d target q0 q1 q2 q3)))))))) + (declaim (inline invert-qd)) (defun invert-qd (v) From rtoy at common-lisp.net Wed Nov 7 21:38:10 2007 From: rtoy at common-lisp.net (rtoy) Date: Wed, 7 Nov 2007 16:38:10 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-extra.lisp qd-fun.lisp qd-rep.lisp qd.lisp timing2.lisp Message-ID: <20071107213810.65F5B46110@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv7652 Modified Files: qd-extra.lisp qd-fun.lisp qd-rep.lisp qd.lisp timing2.lisp Log Message: Merge the changes from the THREE-ARG-BRANCH to HEAD. --- /project/oct/cvsroot/oct/qd-extra.lisp 2007/10/26 15:48:15 1.5 +++ /project/oct/cvsroot/oct/qd-extra.lisp 2007/11/07 21:38:10 1.6 @@ -1099,14 +1099,14 @@ ;; Time Sparc PPC x86 PPC (fma) Sparc2 ;; exp-qd/reduce 2.06 3.18 10.46 2.76 6.12 ;; expm1-qd/series 8.81 12.24 18.87 3.26 29.0 -;; expm1-qd/dup 5.68 4.34 18.47 3.64 18.78 -;; exp-qd/pade 1.53 +;; expm1-qd/dup 5.68 4.34 18.47 3.64 9.77 +;; exp-qd/pade 1.53 4.51 ;; ;; Consing (MB) Sparc ;; exp-qd/reduce 45 45 638 44.4 45 ;; expm1-qd/series 519 519 1201 14.8 519 ;; expm1-qd/dup 32 32 1224 32.0 32 -;; exp-qd/pade 44 +;; exp-qd/pade 44 44 ;; ;; Speeds seem to vary quite a bit between architectures. ;; --- /project/oct/cvsroot/oct/qd-fun.lisp 2007/10/18 14:38:56 1.90 +++ /project/oct/cvsroot/oct/qd-fun.lisp 2007/11/07 21:38:10 1.91 @@ -47,6 +47,7 @@ #+cmu (declaim (ext:maybe-inline sqrt-qd)) +#+nil (defun sqrt-qd (a) "Square root of the (non-negative) quad-float" (declare (type %quad-double a) @@ -79,6 +80,47 @@ (setf r (add-qd r (mul-qd r (sub-d-qd half (mul-qd h (sqr-qd r))))))) (scale-float-qd (mul-qd r new-a) (ash k -1))))) +(defun sqrt-qd (a) + "Square root of the (non-negative) quad-float" + (declare (type %quad-double a) + (optimize (speed 3) (space 0))) + ;; Perform the following Newton iteration: + ;; + ;; x' = x + (1 - a * x^2) * x / 2 + ;; + ;; which converges to 1/sqrt(a). + ;; + ;; However, there appear to be round-off errors when x is either + ;; very large or very small. So let x = f*2^(2*k). Then sqrt(x) = + ;; 2^k*sqrt(f), and sqrt(f) doesn't have round-off problems. + (when (zerop-qd a) + (return-from sqrt-qd a)) + (when (float-infinity-p (qd-0 a)) + (return-from sqrt-qd a)) + + (let* ((k (logandc2 (logb-finite (qd-0 a)) 1)) + (new-a (scale-float-qd a (- k)))) + (assert (evenp k)) + (let* ((r (make-qd-d (cl:/ (sqrt (the (double-float (0d0)) + (qd-0 new-a)))))) + (temp (%make-qd-d 0d0 0d0 0d0 0d0)) + (half 0.5d0) + (h (mul-qd-d new-a half))) + (declare (type %quad-double r)) + ;; Since we start with double-float precision, three more + ;; iterations should give us full accuracy. + (dotimes (k 3) + #+nil + (setf r (add-qd r (mul-qd r (sub-d-qd half (mul-qd h (sqr-qd r)))))) + (sqr-qd r temp) + (mul-qd h temp temp) + (sub-d-qd half temp temp) + (mul-qd r temp temp) + (add-qd r temp r) + ) + (mul-qd r new-a r) + (scale-float-qd r (ash k -1))))) + (defun hypot-aux-qd (x y) (declare (type %quad-double x y)) (let ((k (- (logb-finite (max (cl:abs (qd-0 x)) --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/10/16 17:09:46 1.10 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/07 21:38:10 1.11 @@ -81,6 +81,9 @@ (kernel:%make-double-double-float a2 a3))) ) +(defmacro %store-qd-d (target q0 q1 q2 q3) + (declare (ignore target)) + `(%make-qd-d ,q0 ,q1, q2, q3)) (defun qd-parts (qd) "Extract the four doubles comprising a quad-double and return them @@ -169,6 +172,15 @@ (setf (aref ,a 3) ,a3) ,a))) +(defmacro %store-qd-d (target q0 q1 q2 q3) + (let ((dest (gensym "TARGET-"))) + `(let ((,dest ,target)) + (setf (aref ,dest 0) ,q0) + (setf (aref ,dest 1) ,q1) + (setf (aref ,dest 2) ,q2) + (setf (aref ,dest 3) ,q3) + ,dest))) + (defun qd-parts (qd) "Extract the four doubles comprising a quad-double and return them as multiple values. The most significant double is the first value." @@ -221,3 +233,81 @@ (declare (ignore x)) nil) ) ; end progn + + +(macrolet + ((frob (qd qd-t) + #+cmu + `(define-compiler-macro ,qd (a b &optional c) + (if c + `(setf ,c (,',qd-t ,a ,b nil)) + `(,',qd-t ,a ,b nil))) + #-cmu + `(define-compiler-macro ,qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) + `(,',qd-t ,a ,b ,c)))) + (frob add-qd add-qd-t) + (frob mul-qd mul-qd-t) + (frob div-qd div-qd-t) + (frob add-qd-d add-qd-d-t) + (frob mul-qd-d mul-qd-d-t)) + +#+cmu +(define-compiler-macro sub-qd (a b &optional c) + (if c + `(setf ,c (add-qd-t ,a (neg-qd ,b) nil)) + `(add-qd-t ,a (neg-qd ,b) nil))) + +#-cmu +(define-compiler-macro sub-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-qd-t ,a (neg-qd ,b) ,c)) + +#+cmu +(define-compiler-macro sqr-qd (a &optional c) + (if c + `(setf ,c (sqr-qd-t ,a nil)) + `(sqr-qd-t ,a nil))) + +#-cmu +(define-compiler-macro sqr-qd (a &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(sqr-qd-t ,a ,c)) + +#+cmu +(define-compiler-macro add-d-qd (a b &optional c) + (if c + `(setf ,c (add-qd-d ,b ,a)) + `(add-qd-d ,b ,a))) + +#-cmu +(define-compiler-macro add-d-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-qd-d ,b ,a ,c)) + +#+cmu +(define-compiler-macro sub-qd-d (a b &optional c) + (if c + `(setf ,c (add-qd-d ,a (cl:- ,b))) + `(add-qd-d ,a (cl:- ,b)))) + +#-cmu +(define-compiler-macro sub-qd-d (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-qd-d ,a (cl:- ,b) ,c)) + +#+cmu +(define-compiler-macro sub-d-qd (a b &optional c) + (if c + `(setf ,c (add-d-qd ,a (neg-qd ,b))) + `(add-d-qd ,a (neg-qd ,b)))) + +#-cmu +(define-compiler-macro sub-d-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(add-d-qd ,a (neg-qd ,b) ,c)) + +#+cmu +(define-compiler-macro neg-qd (a &optional c) + (if c + `(setf ,c (neg-qd-t ,a nil)) + `(neg-qd-t ,a nil))) + +#-cmu +(define-compiler-macro neg-qd (a &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + `(neg-qd-t ,a ,c)) + --- /project/oct/cvsroot/oct/qd.lisp 2007/10/18 14:38:11 1.60 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/07 21:38:10 1.61 @@ -132,6 +132,7 @@ add-d-qd add-dd-qd neg-qd + neg-qd-t sub-qd sub-qd-dd sub-qd-d @@ -159,13 +160,19 @@ renorm-4 renorm-5 add-qd-d + add-qd-d-t add-qd-dd add-qd + add-qd-t mul-qd-d + mul-qd-d-t mul-qd-dd mul-qd + mul-qd-t sqr-qd + sqr-qd-t div-qd + div-qd-t div-qd-d div-qd-dd)) @@ -179,12 +186,15 @@ make-qd-d add-qd-d add-d-qd add-qd-dd add-dd-qd - add-qd + add-qd add-qd-t neg-qd sub-qd sub-qd-dd sub-qd-d sub-d-qd - mul-qd-d mul-qd-dd mul-qd + mul-qd-d mul-qd-dd + mul-qd + mul-qd-t sqr-qd div-qd div-qd-d div-qd-dd + div-qd-t make-qd-dd )) @@ -293,13 +303,17 @@ ;;;; Addition ;; Quad-double + double -(defun add-qd-d (a b) +(defun add-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (add-qd-d-t a b target)) + +(defun add-qd-d-t (a b target) "Add a quad-double A and a double-float B" (declare (type %quad-double a) (double-float b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu (ignore target)) (let* ((c0 0d0) (e c0) (c1 c0) @@ -309,21 +323,22 @@ (two-sum c0 e (qd-0 a) b) (when (float-infinity-p c0) - (return-from add-qd-d (%make-qd-d c0 0d0 0d0 0d0))) + (return-from add-qd-d-t (%store-qd-d target c0 0d0 0d0 0d0))) (two-sum c1 e (qd-1 a) e) (two-sum c2 e (qd-2 a) e) (two-sum c3 e (qd-3 a) e) (multiple-value-bind (r0 r1 r2 r3) (renorm-5 c0 c1 c2 c3 e) (if (and (zerop (qd-0 a)) (zerop b)) - (%make-qd-d c0 0d0 0d0 0d0) - (%make-qd-d r0 r1 r2 r3))))) + (%store-qd-d target c0 0d0 0d0 0d0) + (%store-qd-d target r0 r1 r2 r3))))) -(defun add-d-qd (a b) +(defun add-d-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (double-float a) (type %quad-double b) - (optimize (speed 3))) - (add-qd-d b a)) + (optimize (speed 3)) + #+cmu (ignore target)) + (add-qd-d b a #-cmu target)) #+cmu (defun add-qd-dd (a b) @@ -385,10 +400,16 @@ ;; which don't do a very good job with dataflow. CMUCL is one of ;; those compilers. -(defun add-qd (a b) +(defun add-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (add-qd-t a b target)) + + +(defun add-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) - (space 0))) + (space 0)) + #+cmu + (ignore target)) ;; This is the version that is NOT IEEE. Should we use the IEEE ;; version? It's quite a bit more complicated. ;; @@ -407,7 +428,7 @@ (inline float-infinity-p)) (when (float-infinity-p s0) - (return-from add-qd (%make-qd-d s0 0d0 0d0 0d0))) + (return-from add-qd-t (%store-qd-d target s0 0d0 0d0 0d0))) (let ((v0 (cl:- s0 a0)) (v1 (cl:- s1 a1)) (v2 (cl:- s2 a2)) @@ -441,19 +462,27 @@ (multiple-value-setq (s0 s1 s2 s3) (renorm-5 s0 s1 s2 s3 t0)) (if (and (zerop a0) (zerop b0)) - (%make-qd-d (+ a0 b0) 0d0 0d0 0d0) - (%make-qd-d s0 s1 s2 s3))))))))))) + (%store-qd-d target (+ a0 b0) 0d0 0d0 0d0) + (%store-qd-d target s0 s1 s2 s3))))))))))) -(defun neg-qd (a) - (declare (type %quad-double a)) +;; Define some compiler macros to transform add-qd to add-qd-t +;; directly. For CMU, we always replace the parameter C with NIL +;; because we don't use it. For other Lisps, we create the necessary +;; object and call add-qd-t. +(defun neg-qd (a &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (neg-qd-t a target)) + +(defun neg-qd-t (a target) + (declare (type %quad-double a) + #+cmu (ignore target)) (with-qd-parts (a0 a1 a2 a3) a (declare (double-float a0 a1 a2 a3)) - (%make-qd-d (cl:- a0) (cl:- a1) (cl:- a2) (cl:- a3)))) + (%store-qd-d target (cl:- a0) (cl:- a1) (cl:- a2) (cl:- a3)))) -(defun sub-qd (a b) +(defun sub-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type %quad-double a b)) - (add-qd a (neg-qd b))) + (add-qd-t a (neg-qd b) target)) #+cmu (defun sub-qd-dd (a b) @@ -461,16 +490,18 @@ (type double-double-float b)) (add-qd-dd a (cl:- b))) -(defun sub-qd-d (a b) +(defun sub-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type %quad-double a) - (type double-float b)) - (add-qd-d a (cl:- b))) + (type double-float b) + #+cmu (ignore target)) + (add-qd-d a (cl:- b) #-cmu target)) -(defun sub-d-qd (a b) +(defun sub-d-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type double-float a) - (type %quad-double b)) + (type %quad-double b) + #+cmu (ignore target)) ;; a - b = a + (-b) - (add-d-qd a (neg-qd b))) + (add-d-qd a (neg-qd b) #-cmu target)) ;; Works @@ -480,18 +511,22 @@ ;; Clisp says ;; 14.142135623730950488016887242096980785696718753769480731766797379908L0 ;; -(defun mul-qd-d (a b) +(defun mul-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (mul-qd-d-t a b target)) + +(defun mul-qd-d-t (a b target) "Multiply quad-double A with B" (declare (type %quad-double a) (double-float b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu (ignore target)) (multiple-value-bind (p0 q0) (two-prod (qd-0 a) b) (when (float-infinity-p p0) - (return-from mul-qd-d (%make-qd-d p0 0d0 0d0 0d0))) + (return-from mul-qd-d-t (%store-qd-d target p0 0d0 0d0 0d0))) (multiple-value-bind (p1 q1) (two-prod (qd-1 a) b) (declare (double-float p1 q1)) @@ -511,8 +546,8 @@ (multiple-value-bind (s0 s1 s2 s3) (renorm-5 s0 s1 s2 s3 s4) (if (zerop s0) - (%make-qd-d (float-sign p0 0d0) 0d0 0d0 0d0) - (%make-qd-d s0 s1 s2 s3))))))))) + (%store-qd-d target (float-sign p0 0d0) 0d0 0d0 0d0) + (%store-qd-d target s0 s1 s2 s3))))))))) ;; a0 * b0 0 ;; a0 * b1 1 @@ -602,11 +637,17 @@ ;; ;; Clisp says ;; 14.142135623730950488016887242096980785696718753769480731766797379908L0 -(defun mul-qd (a b) + +(defun mul-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (mul-qd-t a b target)) + +(defun mul-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu + (ignore target)) (with-qd-parts (a0 a1 a2 a3) a (declare (double-float a0 a1 a2 a3)) @@ -617,7 +658,7 @@ (two-prod a0 b0) #+cmu (when (float-infinity-p p0) - (return-from mul-qd (%make-qd-d p0 0d0 0d0 0d0))) + (return-from mul-qd-t (%store-qd-d target p0 0d0 0d0 0d0))) (multiple-value-bind (p1 q1) (two-prod a0 b1) (multiple-value-bind (p2 q2) @@ -662,8 +703,9 @@ (multiple-value-bind (r0 r1 s0 s1) (renorm-5 p0 p1 s0 s1 s2) (if (zerop r0) - (%make-qd-d p0 0d0 0d0 0d0) - (%make-qd-d r0 r1 s0 s1)))))))))))))) + (%store-qd-d target p0 0d0 0d0 0d0) + (%store-qd-d target r0 r1 s0 s1)))))))))))))) + ;; This is the non-sloppy version. I think this works just fine, but ;; since qd defaults to the sloppy multiplication version, we do the @@ -766,11 +808,16 @@ (multiple-value-call #'%make-qd-d (renorm-5 p0 p1 s0 t0 t1)))))))))))))))))))) -(defun sqr-qd (a) +(defun sqr-qd (a &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (sqr-qd-t a target)) + +(defun sqr-qd-t (a target) "Square A" (declare (type %quad-double a) (optimize (speed 3) - (space 0))) + (space 0)) + #+cmu + (ignore target)) (multiple-value-bind (p0 q0) (two-sqr (qd-0 a)) (multiple-value-bind (p1 q1) @@ -810,14 +857,20 @@ (multiple-value-bind (a0 a1 a2 a3) (renorm-5 p0 p1 p2 p3 p4) - (%make-qd-d a0 a1 a2 a3))))))))) + (%store-qd-d target a0 a1 a2 a3))))))))) -(defun div-qd (a b) +(defun div-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) + (div-qd-t a b target)) + +#+nil +(defun div-qd-t (a b target) (declare (type %quad-double a b) (optimize (speed 3) (space 0)) - (inline float-infinity-p)) + (inline float-infinity-p) + #+cmu + (ignore target)) (let ((a0 (qd-0 a)) (b0 (qd-0 b))) (let* ((q0 (cl:/ a0 b0)) @@ -825,18 +878,46 @@ (q1 (cl:/ (qd-0 r) b0))) (when (float-infinity-p q0) - (return-from div-qd (%make-qd-d q0 0d0 0d0 0d0))) + (return-from div-qd-t (%store-qd-d target q0 0d0 0d0 0d0))) (setf r (sub-qd r (mul-qd-d b q1))) (let ((q2 (cl:/ (qd-0 r) b0))) (setf r (sub-qd r (mul-qd-d b q2))) (let ((q3 (cl:/ (qd-0 r) b0))) (multiple-value-bind (q0 q1 q2 q3) (renorm-4 q0 q1 q2 q3) - (%make-qd-d q0 q1 q2 q3))))))) + (%store-qd-d target q0 q1 q2 q3))))))) + +(defun div-qd-t (a b target) + (declare (type %quad-double a b) + (optimize (speed 3) + (space 0)) + (inline float-infinity-p) + #+cmu + (ignore target)) + (let ((a0 (qd-0 a)) + (b0 (qd-0 b))) + (let* ((q0 (cl:/ a0 b0)) + (r (%make-qd-d 0d0 0d0 0d0 0d0))) + (mul-qd-d b q0 r) + (sub-qd a r r) + (let* ((q1 (cl:/ (qd-0 r) b0)) + (temp (mul-qd-d b q1))) + (when (float-infinity-p q0) + (return-from div-qd-t (%store-qd-d target q0 0d0 0d0 0d0))) + + (sub-qd r temp r) + (let ((q2 (cl:/ (qd-0 r) b0))) + (mul-qd-d b q2 temp) + (sub-qd r temp r) + (let ((q3 (cl:/ (qd-0 r) b0))) + (multiple-value-bind (q0 q1 q2 q3) + (renorm-4 q0 q1 q2 q3) + (%store-qd-d target q0 q1 q2 q3)))))))) (declaim (inline invert-qd)) -(defun invert-qd(v) ;; a quartic newton iteration for 1/v +(defun invert-qd (v) + ;; a quartic newton iteration for 1/v ;; to invert v, start with a good guess, x. ;; let h= 1-v*x ;; h is small ;; return x+ x*(h+h^2+h^3) . compute h3 in double-float --- /project/oct/cvsroot/oct/timing2.lisp 2007/10/16 14:21:13 1.4 +++ /project/oct/cvsroot/oct/timing2.lisp 2007/11/07 21:38:10 1.5 @@ -117,140 +117,143 @@ Test Time qd oct ---- ----------- -add 0.023 0.09 -mul 0.075 0.13 -div 0.299 0.29 -sqrt 0.105 0.11 -sin 0.115 0.14 -log 0.194 0.12 - -Times are in sec for the test. The default number of iterations were -used. Most of the timings match my expectations, including the log -test. Oct uses a different algorithm (Halley's method) which is -faster (in Lisp) than the algorithm used in qd (Newtwon iteration). +add 0.23 1.16 +mul 0.749 1.54 +div 3.00 3.11 +sqrt 10.57 12.2 +sin 57.33 64.5 +log 194 119 + +Times are in microsec/operation for the test. The default number of +iterations were used. Most of the timings match my expectations, +including the log test. Oct uses a different algorithm (Halley's +method) which is faster (in Lisp) than the algorithm used in qd +(Newtwon iteration). +Not also that these times include the 3-arg versions of the routines. ------------------------------------------------------------------------------- The raw data: The output from qd_timer -qd -v: +Timing qd_real +-------------- + Timing addition... -n = 100000 t = 0.0231462 -b = 1.428571e+04 -100000 operations in 0.0231462 s. - 0.231462 us +n = 1000000 t = 0.236154 +b = 142857.142857 +1000000 operations in 0.236154 s. + 0.236154 us Timing multiplication ... -n = 100000 t = 0.0749929 -b = 2.718268e+00 -100000 operations in 0.0749929 s. - 0.749929 us +n = 1000000 t = 0.748933 +b = 2.718280 +1000000 operations in 0.748933 s. + 0.748933 us Timing division ... -n = 100000 t = 0.298858 -b = 0.367881 -100000 operations in 0.298858 s. - 2.988580 us +n = 1000000 t = 3.004328 +b = 0.367880 +1000000 operations in 3.004328 s. + 3.004328 us Timing square root ... -n = 10000 t = 0.105049 +n = 100000 t = 1.057170 a = 2.821980 -10000 operations in 0.105049 s. - 10.504860 us +100000 operations in 1.057170 s. + 10.571696 us Timing sin ... -n = 2000 t = 0.114943 +n = 20000 t = 1.146667 a = 3.141593 -2000 operations in 0.114943 s. - 57.471350 us +20000 operations in 1.146667 s. + 57.333335 us Timing log ... -n = 1000 t = 0.193698 +n = 10000 t = 1.939869 a = -50.100000 -1000 operations in 0.193698 s. -193.697800 us -The output from CMUCL: +10000 operations in 1.939869 s. +193.986900 us + +-------------------------------------------------- +CMUCL results: -QD> (time-add) +CL-USER> (oct::time-add 1000000) ; Evaluation took: -; 0.09 seconds of real time -; 0.1 seconds of user run time -; 0.0 seconds of system run time -; 147,285,856 CPU cycles +; 1.16 seconds of real time +; 0.98 seconds of user run time +; 0.18 seconds of system run time +; 1,845,637,904 CPU cycles ; 0 page faults and -; 7,200,016 bytes consed. +; 72,000,248 bytes consed. ; -n = 100000 -b = #q14285.7142857142857142857142857142857142857142857142857142857142855q0 -NIL -QD> (time-mul) +n = 1000000 +b = #q142857.142857142857142857142857142857142857142857142857142857142854q0 + +CL-USER> (oct::time-mul 1000000) ; Evaluation took: -; 0.13 seconds of real time -; 0.1 seconds of user run time -; 0.02 seconds of system run time -; 203,790,588 CPU cycles +; 1.53 seconds of real time +; 1.27 seconds of user run time +; 0.25 seconds of system run time +; 2,430,859,732 CPU cycles ; 0 page faults and -; 7,200,824 bytes consed. +; 72,000,248 bytes consed. ; -n = 100000 -b = #q2.71826823717448966803506482442604644797444693267782286300915989397q0 -NIL -QD> (time-div) +n = 1000000 +b = #q2.71828046931937688381979970845435639275164502668250771294016782123q0 + +CL-USER> (oct::time-div 1000000) ; Evaluation took: -; 0.29 seconds of real time -; 0.28 seconds of user run time -; 0.01 seconds of system run time -; 460,956,912 CPU cycles +; 3.11 seconds of real time +; 2.94 seconds of user run time +; 0.16 seconds of system run time +; 4,957,512,968 CPU cycles ; 0 page faults and -; 7,200,016 bytes consed. +; 72,000,248 bytes consed. ; -n = 100000 -b = #q0.36788128056098406210328658773118942247132502490133718973918140856q0 -NIL -QD> (time-sqrt 10000) +n = 1000000 +b = #q0.367879625111086265804761271038216553876450599098470428879260437304q0 +CL-USER> (oct::time-sqrt 100000) ; Evaluation took: -; 0.11 seconds of real time -; 0.1 seconds of user run time -; 0.0 seconds of system run time -; 173,209,708 CPU cycles +; 1.22 seconds of real time +; 1.1 seconds of user run time +; 0.1 seconds of system run time +; 1,938,798,996 CPU cycles ; 0 page faults and -; 2,402,560 bytes consed. +; 24,000,128 bytes consed. ; -n = 10000 +n = 100000 a = #q2.82198033014704783016853125515542796898998765943212617578596649019q0 -NIL -QD> (time-sin) + +CL-USER> (oct::time-sin 20000) ; Evaluation took: -; 0.14 seconds of real time -; 0.14 seconds of user run time -; 0.0 seconds of system run time -; 213,378,476 CPU cycles +; 1.29 seconds of real time +; 1.24 seconds of user run time +; 0.05 seconds of system run time +; 2,053,157,408 CPU cycles ; 0 page faults and -; 3,105,800 bytes consed. +; 27,751,144 bytes consed. ; -n = 2000 -a = #q3.14159265358979323846264338327950288419716939937510582097494459409q0 -NIL -QD> (time-log) +n = 20000 +a = #q3.14159265358979323846264338327950288419716939937510582097494458294q0 + +CL-USER> (oct::time-log 10000) ; Evaluation took: -; 0.12 seconds of real time -; 0.12 seconds of user run time -; 0.01 seconds of system run time -; 192,187,304 CPU cycles +; 1.19 seconds of real time +; 1.13 seconds of user run time +; 0.04 seconds of system run time +; 1,890,677,952 CPU cycles ; 0 page faults and -; 1,621,792 bytes consed. +; 16,197,536 bytes consed. ; -n = 1000 -a = #q-50.100000000000000000000000000000000000000000000000000000000208796q0 -NIL -QD> +n = 10000 +a = #q-50.100000000000000000000000000000000000000000000000000000552824575q0 ---------------------------------------------- ||# From rtoy at common-lisp.net Sat Nov 10 21:29:54 2007 From: rtoy at common-lisp.net (rtoy) Date: Sat, 10 Nov 2007 16:29:54 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd.lisp Message-ID: <20071110212954.CC7A81F00A@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv13680 Modified Files: qd.lisp Log Message: Add ADD-QD-D-t, MUL-QD-D-T, and SQR-QD-T to declaim block. --- /project/oct/cvsroot/oct/qd.lisp 2007/11/07 21:38:10 1.61 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/10 21:29:54 1.62 @@ -187,12 +187,15 @@ add-qd-d add-d-qd add-qd-dd add-dd-qd add-qd add-qd-t + add-qd-d-t neg-qd sub-qd sub-qd-dd sub-qd-d sub-d-qd mul-qd-d mul-qd-dd mul-qd mul-qd-t + mul-qd-d-t sqr-qd + sqr-qd-t div-qd div-qd-d div-qd-dd div-qd-t make-qd-dd From rtoy at common-lisp.net Fri Nov 16 19:44:06 2007 From: rtoy at common-lisp.net (rtoy) Date: Fri, 16 Nov 2007 14:44:06 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-dd.lisp qd.lisp Message-ID: <20071116194406.42FD71135@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv29848 Modified Files: qd-dd.lisp qd.lisp Log Message: Don't compile with safety 0 on Allegro. I think this causes problems on x86 because it doesn't quite keep the precision right. --- /project/oct/cvsroot/oct/qd-dd.lisp 2007/10/15 18:53:44 1.11 +++ /project/oct/cvsroot/oct/qd-dd.lisp 2007/11/16 19:44:06 1.12 @@ -206,7 +206,7 @@ (defun two-prod (a b) "Compute fl(a*b) and err(a*b)" (declare (double-float a b) - (optimize (speed 3) (safety 0) (debug 0))) + (optimize (speed 3) (safety #-allegro 0 #+allegro 1) (debug 0))) (let ((p (* a b))) (declare (double-float p)) (multiple-value-bind (a-hi a-lo) @@ -227,7 +227,7 @@ "Compute fl(a*a) and err(a*b). This is a more efficient implementation of two-prod" (declare (double-float a) - (optimize (speed 3) (safety 0) (debug 0))) + (optimize (speed 3) (safety #-allegro 0 #+allegro 1) (debug 0))) (let ((q (* a a))) (declare (double-float q)) (multiple-value-bind (a-hi a-lo) --- /project/oct/cvsroot/oct/qd.lisp 2007/11/10 21:29:54 1.62 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/16 19:44:06 1.63 @@ -227,7 +227,7 @@ (defun renorm-4 (c0 c1 c2 c3) (declare (double-float c0 c1 c2 c3) - (optimize (speed 3) (safety 0) (debug 0))) + (optimize (speed 3) (safety #-allegro 0 #+allegro 1) (debug 0))) (let ((s0 0d0) (s1 0d0) (s2 0d0) @@ -252,7 +252,7 @@ (defun renorm-5 (c0 c1 c2 c3 c4) (declare (double-float c0 c1 c2 c3 c4) - (optimize (speed 3) (safety 0))) + (optimize (speed 3) (safety #-allegro 0 #+allegro 1))) (let ((s0 0d0) (s1 0d0) (s2 0d0) From rtoy at common-lisp.net Fri Nov 23 03:42:25 2007 From: rtoy at common-lisp.net (rtoy) Date: Thu, 22 Nov 2007 22:42:25 -0500 (EST) Subject: [oct-cvs] Oct commit: oct qd-rep.lisp Message-ID: <20071123034225.115EB4C001@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv789 Modified Files: qd-rep.lisp Log Message: Don't put initializers for the optional arg in the define-compiler-macro. This causes the same initializer object to be used everywhere. Instead, if no optional arg is given, call the initializer in the expansion. This fixes some issues with Allegro, and probably all other Lisps that use don't have complex double-double-float objects. --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/07 21:38:10 1.11 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/23 03:42:24 1.12 @@ -243,8 +243,10 @@ `(setf ,c (,',qd-t ,a ,b nil)) `(,',qd-t ,a ,b nil))) #-cmu - `(define-compiler-macro ,qd (a b &optional (c (%make-qd-d 0d0 0d0 0d0 0d0))) - `(,',qd-t ,a ,b ,c)))) + `(define-compiler-macro ,qd (a b &optional c) + (if c + `(,',qd-t ,a ,b ,c) + `(,',qd-t ,a ,b (%make-qd-d 0d0 0d0 0d0 0d0)))))) (frob add-qd add-qd-t) (frob mul-qd mul-qd-t) (frob div-qd div-qd-t) @@ -258,8 +260,10 @@ `(add-qd-t ,a (neg-qd ,b) nil))) #-cmu -(define-compiler-macro sub-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) - `(add-qd-t ,a (neg-qd ,b) ,c)) +(define-compiler-macro sub-qd (a b &optional c) + (if c + `(add-qd-t ,a (neg-qd ,b) ,c) + `(add-qd-t ,a (neg-qd ,b) (%make-qd-d 0d0 0d0 0d0 0d0)))) #+cmu (define-compiler-macro sqr-qd (a &optional c) @@ -268,8 +272,10 @@ `(sqr-qd-t ,a nil))) #-cmu -(define-compiler-macro sqr-qd (a &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) - `(sqr-qd-t ,a ,c)) +(define-compiler-macro sqr-qd (a &optional c) + (if c + `(sqr-qd-t ,a ,c) + `(sqr-qd-t ,a (%make-qd-d 0d0 0d0 0d0 0d0)))) #+cmu (define-compiler-macro add-d-qd (a b &optional c) @@ -278,8 +284,10 @@ `(add-qd-d ,b ,a))) #-cmu -(define-compiler-macro add-d-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) - `(add-qd-d ,b ,a ,c)) +(define-compiler-macro add-d-qd (a b &optional c) + (if c + `(add-qd-d ,b ,a ,c) + `(add-qd-d ,b ,a (%make-qd-d 0d0 0d0 0d0 0d0)))) #+cmu (define-compiler-macro sub-qd-d (a b &optional c) @@ -288,8 +296,10 @@ `(add-qd-d ,a (cl:- ,b)))) #-cmu -(define-compiler-macro sub-qd-d (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) - `(add-qd-d ,a (cl:- ,b) ,c)) +(define-compiler-macro sub-qd-d (a b &optional c) + (if c + `(add-qd-d ,a (cl:- ,b) ,c) + `(add-qd-d ,a (cl:- ,b) (%make-qd-d 0d0 0d0 0d0 0d0)))) #+cmu (define-compiler-macro sub-d-qd (a b &optional c) @@ -298,8 +308,10 @@ `(add-d-qd ,a (neg-qd ,b)))) #-cmu -(define-compiler-macro sub-d-qd (a b &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) - `(add-d-qd ,a (neg-qd ,b) ,c)) +(define-compiler-macro sub-d-qd (a b &optional c) + (if c + `(add-d-qd ,a (neg-qd ,b) ,c) + `(add-d-qd ,a (neg-qd ,b) (%make-qd-d 0d0 0d0 0d0 0d0)))) #+cmu (define-compiler-macro neg-qd (a &optional c) @@ -308,6 +320,8 @@ `(neg-qd-t ,a nil))) #-cmu -(define-compiler-macro neg-qd (a &optional (c #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) - `(neg-qd-t ,a ,c)) +(define-compiler-macro neg-qd (a &optional c) + (if c + `(neg-qd-t ,a ,c) + `(neg-qd-t ,a (%make-qd-d 0d0 0d0 0d0 0d0)))) From rtoy at common-lisp.net Wed Nov 28 20:00:29 2007 From: rtoy at common-lisp.net (rtoy) Date: Wed, 28 Nov 2007 15:00:29 -0500 (EST) Subject: [oct-cvs] Oct commit: oct oct.system qd-fun.lisp qd-rep.lisp qd.lisp Message-ID: <20071128200029.3F63C2B129@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv14440 Modified Files: oct.system qd-fun.lisp qd-rep.lisp qd.lisp Log Message: Adjust code so that CMUCL can use arrays to store quad-doubles instead of using a (complex double-double-float). With these changes, CMUCL uses arrays and (rt:do-tests) passes successfully. oct.system: o Push :oct-array onto *FEATURES* to use arrays. This is the default if not building on CMUCL. qd-fun.lisp: o Fix two erroneous uses of zerop on a quad-double in sinh-qd and tanh-qd. o Fix two erroneous uses of + on %quad-double; they should have used ADD-QD instead. qd-rep.lisp: o Change conditionalization to allow arrays for CMUCL. o Update compiler macros appropriately. qd.lisp: o Adjust optional target arg appropriately for oct-array feature. o Clean up IGNORE declarations. o Add some more declarations for the target to make CMUCL happier. --- /project/oct/cvsroot/oct/oct.system 2007/09/16 05:00:00 1.22 +++ /project/oct/cvsroot/oct/oct.system 2007/11/28 20:00:28 1.23 @@ -44,6 +44,16 @@ (eval-when (:load-toplevel :compile-toplevel :execute) (setf ext:*inline-expansion-limit* 1600)) +;; +;; For all Lisps other than CMUCL, oct uses arrays to store the +;; quad-double values. This is denoted by the feature :oct-array. +;; For CMUCL, quad-doubles can be stored in a (complex +;; double-double-float) object, which is an extension in CMUCL. +;; If you want CMUCL to use an array too, add :oct-array to *features*. +;;#-cmu +(pushnew :oct-array *features*) + + (mk:defsystem oct :source-pathname (make-pathname :directory (pathname-directory *load-pathname*)) :components --- /project/oct/cvsroot/oct/qd-fun.lisp 2007/11/07 21:38:10 1.91 +++ /project/oct/cvsroot/oct/qd-fun.lisp 2007/11/28 20:00:28 1.92 @@ -1140,7 +1140,7 @@ (declare (type %quad-double a)) ;; Hart et al. suggests sinh(x) = 1/2*(D(x) + D(x)/(D(x)+1)) ;; where D(x) = exp(x) - 1. This helps for x near 0. - (cond ((zerop a) + (cond ((zerop (qd-0 a)) a) ((float-infinity-p (qd-0 a)) a) @@ -1166,7 +1166,7 @@ "Tanh(a)" (declare (type %quad-double a)) ;; Hart et al. suggests tanh(x) = D(2*x)/(2+D(2*x)) - (cond ((zerop a) + (cond ((zerop (qd-0 a)) a) ((> (abs (qd-0 a)) (/ (+ (log most-positive-double-float) (log 2d0)) @@ -1262,8 +1262,8 @@ (if (minusp-qd a) (neg-qd (asinh-qd (neg-qd a))) (let ((1/a (div-qd (make-qd-d 1d0) a))) - (+ (log-qd a) - (log1p-qd (sqrt-qd (add-qd-d (sqr-qd 1/a) 1d0))))))))) + (add-qd (log-qd a) + (log1p-qd (sqrt-qd (add-qd-d (sqr-qd 1/a) 1d0))))))))) (defun acosh-qd (a) "Acosh(a)" @@ -1297,9 +1297,9 @@ a) (t (let ((1/a (div-qd (make-qd-d 1d0) a))) - (+ (log-qd a) - (log1p-qd (mul-qd (sqrt-qd (sub-d-qd 1d0 1/a)) - (sqrt-qd (add-d-qd 1d0 1/a))))))))) + (add-qd (log-qd a) + (log1p-qd (mul-qd (sqrt-qd (sub-d-qd 1d0 1/a)) + (sqrt-qd (add-d-qd 1d0 1/a))))))))) (defun atanh-qd (a) "Atanh(a)" --- /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/23 03:42:24 1.12 +++ /project/oct/cvsroot/oct/qd-rep.lisp 2007/11/28 20:00:28 1.13 @@ -35,11 +35,13 @@ ;;; return all four values at once. ;; All of the following functions should be inline to reduce consing. +#+(and cmu (not oct-array)) (declaim (inline qd-0 qd-1 qd-2 qd-3 %make-qd-d qd-parts)) -#+cmu + +#+(and cmu (not oct-array)) (progn ;; For CMUCL (at least recent enough versions that support ;; double-double-float), we can use a (complex double-double-float) to @@ -98,7 +100,7 @@ ) ; end progn -#-cmu +#+oct-array (progn ;; For Lisp's without a double-double-float type, I think the best we ;; can do is a simple-array of four double-floats. Even with @@ -175,6 +177,7 @@ (defmacro %store-qd-d (target q0 q1 q2 q3) (let ((dest (gensym "TARGET-"))) `(let ((,dest ,target)) + (declare (type %quad-double ,dest)) (setf (aref ,dest 0) ,q0) (setf (aref ,dest 1) ,q1) (setf (aref ,dest 2) ,q2) @@ -237,12 +240,12 @@ (macrolet ((frob (qd qd-t) - #+cmu + #-oct-array `(define-compiler-macro ,qd (a b &optional c) (if c `(setf ,c (,',qd-t ,a ,b nil)) `(,',qd-t ,a ,b nil))) - #-cmu + #+oct-array `(define-compiler-macro ,qd (a b &optional c) (if c `(,',qd-t ,a ,b ,c) @@ -253,73 +256,73 @@ (frob add-qd-d add-qd-d-t) (frob mul-qd-d mul-qd-d-t)) -#+cmu +#+(and cmu (not oct-array)) (define-compiler-macro sub-qd (a b &optional c) (if c `(setf ,c (add-qd-t ,a (neg-qd ,b) nil)) `(add-qd-t ,a (neg-qd ,b) nil))) -#-cmu +#-(and cmu (not oct-array)) (define-compiler-macro sub-qd (a b &optional c) (if c `(add-qd-t ,a (neg-qd ,b) ,c) `(add-qd-t ,a (neg-qd ,b) (%make-qd-d 0d0 0d0 0d0 0d0)))) -#+cmu +#+(and cmu (not oct-array)) (define-compiler-macro sqr-qd (a &optional c) (if c `(setf ,c (sqr-qd-t ,a nil)) `(sqr-qd-t ,a nil))) -#-cmu +#-(and cmu (not oct-array)) (define-compiler-macro sqr-qd (a &optional c) (if c `(sqr-qd-t ,a ,c) `(sqr-qd-t ,a (%make-qd-d 0d0 0d0 0d0 0d0)))) -#+cmu +#+(and cmu (not oct-array)) (define-compiler-macro add-d-qd (a b &optional c) (if c `(setf ,c (add-qd-d ,b ,a)) `(add-qd-d ,b ,a))) -#-cmu +#-(and cmu (not oct-array)) (define-compiler-macro add-d-qd (a b &optional c) (if c `(add-qd-d ,b ,a ,c) `(add-qd-d ,b ,a (%make-qd-d 0d0 0d0 0d0 0d0)))) -#+cmu +#+(and cmu (not oct-array)) (define-compiler-macro sub-qd-d (a b &optional c) (if c `(setf ,c (add-qd-d ,a (cl:- ,b))) `(add-qd-d ,a (cl:- ,b)))) -#-cmu +#-(and cmu (not oct-array)) (define-compiler-macro sub-qd-d (a b &optional c) (if c `(add-qd-d ,a (cl:- ,b) ,c) `(add-qd-d ,a (cl:- ,b) (%make-qd-d 0d0 0d0 0d0 0d0)))) -#+cmu +#+(and cmu (not oct-array)) (define-compiler-macro sub-d-qd (a b &optional c) (if c `(setf ,c (add-d-qd ,a (neg-qd ,b))) `(add-d-qd ,a (neg-qd ,b)))) -#-cmu +#-(and cmu (not oct-array)) (define-compiler-macro sub-d-qd (a b &optional c) (if c `(add-d-qd ,a (neg-qd ,b) ,c) `(add-d-qd ,a (neg-qd ,b) (%make-qd-d 0d0 0d0 0d0 0d0)))) -#+cmu +#+(and cmu (not oct-array)) (define-compiler-macro neg-qd (a &optional c) (if c `(setf ,c (neg-qd-t ,a nil)) `(neg-qd-t ,a nil))) -#-cmu +#-(and cmu (not oct-array)) (define-compiler-macro neg-qd (a &optional c) (if c `(neg-qd-t ,a ,c) --- /project/oct/cvsroot/oct/qd.lisp 2007/11/16 19:44:06 1.63 +++ /project/oct/cvsroot/oct/qd.lisp 2007/11/28 20:00:28 1.64 @@ -306,17 +306,17 @@ ;;;; Addition ;; Quad-double + double -(defun add-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun add-qd-d (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (add-qd-d-t a b target)) (defun add-qd-d-t (a b target) "Add a quad-double A and a double-float B" - (declare (type %quad-double a) + (declare (type %quad-double a target) (double-float b) (optimize (speed 3) (space 0)) (inline float-infinity-p) - #+cmu (ignore target)) + #+(and cmu (not oct-array)) (ignore target)) (let* ((c0 0d0) (e c0) (c1 c0) @@ -336,12 +336,12 @@ (%store-qd-d target c0 0d0 0d0 0d0) (%store-qd-d target r0 r1 r2 r3))))) -(defun add-d-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun add-d-qd (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (double-float a) (type %quad-double b) (optimize (speed 3)) #+cmu (ignore target)) - (add-qd-d b a #-cmu target)) + (add-qd-d b a #+oct-array target)) #+cmu (defun add-qd-dd (a b) @@ -403,15 +403,15 @@ ;; which don't do a very good job with dataflow. CMUCL is one of ;; those compilers. -(defun add-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun add-qd (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (add-qd-t a b target)) (defun add-qd-t (a b target) - (declare (type %quad-double a b) + (declare (type %quad-double a b target) (optimize (speed 3) (space 0)) - #+cmu + #+(and cmu (not oct-array)) (ignore target)) ;; This is the version that is NOT IEEE. Should we use the IEEE ;; version? It's quite a bit more complicated. @@ -472,18 +472,18 @@ ;; directly. For CMU, we always replace the parameter C with NIL ;; because we don't use it. For other Lisps, we create the necessary ;; object and call add-qd-t. -(defun neg-qd (a &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun neg-qd (a &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (neg-qd-t a target)) (defun neg-qd-t (a target) - (declare (type %quad-double a) - #+cmu (ignore target)) + (declare (type %quad-double a target) + #+(and cmu (not oct-array)) (ignore target)) (with-qd-parts (a0 a1 a2 a3) a (declare (double-float a0 a1 a2 a3)) (%store-qd-d target (cl:- a0) (cl:- a1) (cl:- a2) (cl:- a3)))) -(defun sub-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun sub-qd (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type %quad-double a b)) (add-qd-t a (neg-qd b) target)) @@ -493,18 +493,18 @@ (type double-double-float b)) (add-qd-dd a (cl:- b))) -(defun sub-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun sub-qd-d (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type %quad-double a) (type double-float b) #+cmu (ignore target)) - (add-qd-d a (cl:- b) #-cmu target)) + (add-qd-d a (cl:- b) #+oct-array target)) -(defun sub-d-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun sub-d-qd (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (declare (type double-float a) (type %quad-double b) - #+cmu (ignore target)) + #+(and cmu (not oct-array)) (ignore target)) ;; a - b = a + (-b) - (add-d-qd a (neg-qd b) #-cmu target)) + (add-d-qd a (neg-qd b) #+oct-array target)) ;; Works @@ -514,17 +514,17 @@ ;; Clisp says ;; 14.142135623730950488016887242096980785696718753769480731766797379908L0 ;; -(defun mul-qd-d (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun mul-qd-d (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (mul-qd-d-t a b target)) (defun mul-qd-d-t (a b target) "Multiply quad-double A with B" - (declare (type %quad-double a) + (declare (type %quad-double a target) (double-float b) (optimize (speed 3) (space 0)) (inline float-infinity-p) - #+cmu (ignore target)) + #+(and cmu (not oct-array)) (ignore target)) (multiple-value-bind (p0 q0) (two-prod (qd-0 a) b) @@ -641,15 +641,15 @@ ;; Clisp says ;; 14.142135623730950488016887242096980785696718753769480731766797379908L0 -(defun mul-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun mul-qd (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (mul-qd-t a b target)) (defun mul-qd-t (a b target) - (declare (type %quad-double a b) + (declare (type %quad-double a b target) (optimize (speed 3) (space 0)) (inline float-infinity-p) - #+cmu + #+(and cmu (not oct-array)) (ignore target)) (with-qd-parts (a0 a1 a2 a3) a @@ -811,15 +811,15 @@ (multiple-value-call #'%make-qd-d (renorm-5 p0 p1 s0 t0 t1)))))))))))))))))))) -(defun sqr-qd (a &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun sqr-qd (a &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (sqr-qd-t a target)) (defun sqr-qd-t (a target) "Square A" - (declare (type %quad-double a) + (declare (type %quad-double a target) (optimize (speed 3) (space 0)) - #+cmu + #+(and cmu (not oct-array)) (ignore target)) (multiple-value-bind (p0 q0) (two-sqr (qd-0 a)) @@ -863,7 +863,7 @@ (%store-qd-d target a0 a1 a2 a3))))))))) -(defun div-qd (a b &optional (target #-cmu (%make-qd-d 0d0 0d0 0d0 0d0))) +(defun div-qd (a b &optional (target #+oct-array (%make-qd-d 0d0 0d0 0d0 0d0))) (div-qd-t a b target)) #+nil @@ -891,11 +891,11 @@ (%store-qd-d target q0 q1 q2 q3))))))) (defun div-qd-t (a b target) - (declare (type %quad-double a b) + (declare (type %quad-double a b target) (optimize (speed 3) (space 0)) (inline float-infinity-p) - #+cmu + #+(and cmu (not oct-array)) (ignore target)) (let ((a0 (qd-0 a)) (b0 (qd-0 b))) From rtoy at common-lisp.net Wed Nov 28 21:41:27 2007 From: rtoy at common-lisp.net (rtoy) Date: Wed, 28 Nov 2007 16:41:27 -0500 (EST) Subject: [oct-cvs] Oct commit: oct oct.system Message-ID: <20071128214127.8A2E71D13C@common-lisp.net> Update of /project/oct/cvsroot/oct In directory clnet:/tmp/cvs-serv7343 Modified Files: oct.system Log Message: Make default for CMUCL not to use arrays. Some quick tests show that currently arrays are slower or much slower than complex double-doubles. --- /project/oct/cvsroot/oct/oct.system 2007/11/28 20:00:28 1.23 +++ /project/oct/cvsroot/oct/oct.system 2007/11/28 21:41:27 1.24 @@ -50,7 +50,7 @@ ;; For CMUCL, quad-doubles can be stored in a (complex ;; double-double-float) object, which is an extension in CMUCL. ;; If you want CMUCL to use an array too, add :oct-array to *features*. -;;#-cmu +#-cmu (pushnew :oct-array *features*)