RTAI API: base/math/e_sqrt.c Source File

00001 /* @(#)e_sqrt.c 5.1 93/09/24 */
00002 /*
00003  * ====================================================
00004  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
00005  *
00006  * Developed at SunPro, a Sun Microsystems, Inc. business.
00007  * Permission to use, copy, modify, and distribute this
00008  * software is freely granted, provided that this notice 
00009  * is preserved.
00010  * ====================================================
00011  */
00012 
00013 #if defined(LIBM_SCCS) && !defined(lint)
00014 static char rcsid[] = "$NetBSD: e_sqrt.c,v 1.8 1995/05/10 20:46:17 jtc Exp $";
00015 #endif
00016 
00017 /* __ieee754_sqrt(x)
00018  * Return correctly rounded sqrt.
00019  *           ------------------------------------------
00020  *       |  Use the hardware sqrt if you have one |
00021  *           ------------------------------------------
00022  * Method: 
00023  *   Bit by bit method using integer arithmetic. (Slow, but portable) 
00024  *   1. Normalization
00025  *  Scale x to y in [1,4) with even powers of 2: 
00026  *  find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
00027  *      sqrt(x) = 2^k * sqrt(y)
00028  *   2. Bit by bit computation
00029  *  Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
00030  *       i                           0
00031  *                                     i+1         2
00032  *      s  = 2*q , and  y  =  2   * ( y - q  ).     (1)
00033  *       i      i            i                 i
00034  *                                                        
00035  *  To compute q    from q , one checks whether 
00036  *          i+1       i                       
00037  *
00038  *                -(i+1) 2
00039  *          (q + 2      ) <= y.         (2)
00040  *                i
00041  *                                -(i+1)
00042  *  If (2) is false, then q   = q ; otherwise q   = q  + 2      .
00043  *                 i+1   i             i+1   i
00044  *
00045  *  With some algebric manipulation, it is not difficult to see
00046  *  that (2) is equivalent to 
00047  *                             -(i+1)
00048  *          s  +  2       <= y          (3)
00049  *           i                i
00050  *
00051  *  The advantage of (3) is that s  and y  can be computed by 
00052  *                    i      i
00053  *  the following recurrence formula:
00054  *      if (3) is false
00055  *
00056  *      s     =  s  ,   y    = y   ;            (4)
00057  *       i+1      i      i+1    i
00058  *
00059  *      otherwise,
00060  *                         -i                     -(i+1)
00061  *      s     =  s  + 2  ,  y    = y  -  s  - 2         (5)
00062  *           i+1      i          i+1    i     i
00063  *              
00064  *  One may easily use induction to prove (4) and (5). 
00065  *  Note. Since the left hand side of (3) contain only i+2 bits,
00066  *        it does not necessary to do a full (53-bit) comparison 
00067  *        in (3).
00068  *   3. Final rounding
00069  *  After generating the 53 bits result, we compute one more bit.
00070  *  Together with the remainder, we can decide whether the
00071  *  result is exact, bigger than 1/2ulp, or less than 1/2ulp
00072  *  (it will never equal to 1/2ulp).
00073  *  The rounding mode can be detected by checking whether
00074  *  huge + tiny is equal to huge, and whether huge - tiny is
00075  *  equal to huge for some floating point number "huge" and "tiny".
00076  *      
00077  * Special cases:
00078  *  sqrt(+-0) = +-0     ... exact
00079  *  sqrt(inf) = inf
00080  *  sqrt(-ve) = NaN     ... with invalid signal
00081  *  sqrt(NaN) = NaN     ... with invalid signal for signaling NaN
00082  *
00083  * Other methods : see the appended file at the end of the program below.
00084  *---------------
00085  */
00086 
00087 #include "math.h"
00088 #include "mathP.h"
00089 
00090 #ifdef __STDC__
00091 static  const double    one = 1.0, tiny=1.0e-300;
00092 #else
00093 static  double  one = 1.0, tiny=1.0e-300;
00094 #endif
00095 
00096 #ifdef __STDC__
00097     double __ieee754_sqrt(double x)
00098 #else
00099     double __ieee754_sqrt(x)
00100     double x;
00101 #endif
00102 {
00103     double z;
00104     int32_t sign = (int)0x80000000; 
00105     int32_t ix0,s0,q,m,t,i;
00106     u_int32_t r,t1,s1,ix1,q1;
00107 
00108     EXTRACT_WORDS(ix0,ix1,x);
00109 
00110     /* take care of Inf and NaN */
00111     if((ix0&0x7ff00000)==0x7ff00000) {          
00112         return x*x+x;       /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
00113                        sqrt(-inf)=sNaN */
00114     } 
00115     /* take care of zero */
00116     if(ix0<=0) {
00117         if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
00118         else if(ix0<0)
00119         return (x-x)/(x-x);     /* sqrt(-ve) = sNaN */
00120     }
00121     /* normalize x */
00122     m = (ix0>>20);
00123     if(m==0) {              /* subnormal x */
00124         while(ix0==0) {
00125         m -= 21;
00126         ix0 |= (ix1>>11); ix1 <<= 21;
00127         }
00128         for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
00129         m -= i-1;
00130         ix0 |= (ix1>>(32-i));
00131         ix1 <<= i;
00132     }
00133     m -= 1023;  /* unbias exponent */
00134     ix0 = (ix0&0x000fffff)|0x00100000;
00135     if(m&1){    /* odd m, double x to make it even */
00136         ix0 += ix0 + ((ix1&sign)>>31);
00137         ix1 += ix1;
00138     }
00139     m >>= 1;    /* m = [m/2] */
00140 
00141     /* generate sqrt(x) bit by bit */
00142     ix0 += ix0 + ((ix1&sign)>>31);
00143     ix1 += ix1;
00144     q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
00145     r = 0x00200000;     /* r = moving bit from right to left */
00146 
00147     while(r!=0) {
00148         t = s0+r; 
00149         if(t<=ix0) { 
00150         s0   = t+r; 
00151         ix0 -= t; 
00152         q   += r; 
00153         } 
00154         ix0 += ix0 + ((ix1&sign)>>31);
00155         ix1 += ix1;
00156         r>>=1;
00157     }
00158 
00159     r = sign;
00160     while(r!=0) {
00161         t1 = s1+r; 
00162         t  = s0;
00163         if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 
00164         s1  = t1+r;
00165         if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
00166         ix0 -= t;
00167         if (ix1 < t1) ix0 -= 1;
00168         ix1 -= t1;
00169         q1  += r;
00170         }
00171         ix0 += ix0 + ((ix1&sign)>>31);
00172         ix1 += ix1;
00173         r>>=1;
00174     }
00175 
00176     /* use floating add to find out rounding direction */
00177     if((ix0|ix1)!=0) {
00178         z = one-tiny; /* trigger inexact flag */
00179         if (z>=one) {
00180             z = one+tiny;
00181             if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
00182         else if (z>one) {
00183             if (q1==(u_int32_t)0xfffffffe) q+=1;
00184             q1+=2; 
00185         } else
00186                 q1 += (q1&1);
00187         }
00188     }
00189     ix0 = (q>>1)+0x3fe00000;
00190     ix1 =  q1>>1;
00191     if ((q&1)==1) ix1 |= sign;
00192     ix0 += (m <<20);
00193     INSERT_WORDS(z,ix0,ix1);
00194     return z;
00195 }
00196 
00197 /*
00198 Other methods  (use floating-point arithmetic)
00199 -------------
00200 (This is a copy of a drafted paper by Prof W. Kahan 
00201 and K.C. Ng, written in May, 1986)
00202 
00203     Two algorithms are given here to implement sqrt(x) 
00204     (IEEE double precision arithmetic) in software.
00205     Both supply sqrt(x) correctly rounded. The first algorithm (in
00206     Section A) uses newton iterations and involves four divisions.
00207     The second one uses reciproot iterations to avoid division, but
00208     requires more multiplications. Both algorithms need the ability
00209     to chop results of arithmetic operations instead of round them, 
00210     and the INEXACT flag to indicate when an arithmetic operation
00211     is executed exactly with no roundoff error, all part of the 
00212     standard (IEEE 754-1985). The ability to perform shift, add,
00213     subtract and logical AND operations upon 32-bit words is needed
00214     too, though not part of the standard.
00215 
00216 A.  sqrt(x) by Newton Iteration
00217 
00218    (1)  Initial approximation
00219 
00220     Let x0 and x1 be the leading and the trailing 32-bit words of
00221     a floating point number x (in IEEE double format) respectively 
00222 
00223         1    11          52               ...widths
00224        ------------------------------------------------------
00225     x: |s|    e     |         f             |
00226        ------------------------------------------------------
00227           msb    lsb  msb                     lsb ...order
00228 
00229  
00230          ------------------------        ------------------------
00231     x0:  |s|   e    |    f1     |    x1: |          f2           |
00232          ------------------------        ------------------------
00233 
00234     By performing shifts and subtracts on x0 and x1 (both regarded
00235     as integers), we obtain an 8-bit approximation of sqrt(x) as
00236     follows.
00237 
00238         k  := (x0>>1) + 0x1ff80000;
00239         y0 := k - T1[31&(k>>15)].   ... y ~ sqrt(x) to 8 bits
00240     Here k is a 32-bit integer and T1[] is an integer array containing
00241     correction terms. Now magically the floating value of y (y's
00242     leading 32-bit word is y0, the value of its trailing word is 0)
00243     approximates sqrt(x) to almost 8-bit.
00244 
00245     Value of T1:
00246     static int T1[32]= {
00247     0,  1024,   3062,   5746,   9193,   13348,  18162,  23592,
00248     29598,  36145,  43202,  50740,  58733,  67158,  75992,  85215,
00249     83599,  71378,  60428,  50647,  41945,  34246,  27478,  21581,
00250     16499,  12183,  8588,   5674,   3403,   1742,   661,    130,};
00251 
00252     (2) Iterative refinement
00253 
00254     Apply Heron's rule three times to y, we have y approximates 
00255     sqrt(x) to within 1 ulp (Unit in the Last Place):
00256 
00257         y := (y+x/y)/2      ... almost 17 sig. bits
00258         y := (y+x/y)/2      ... almost 35 sig. bits
00259         y := y-(y-x/y)/2    ... within 1 ulp
00260 
00261 
00262     Remark 1.
00263         Another way to improve y to within 1 ulp is:
00264 
00265         y := (y+x/y)        ... almost 17 sig. bits to 2*sqrt(x)
00266         y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x)
00267 
00268                 2
00269                 (x-y )*y
00270         y := y + 2* ----------  ...within 1 ulp
00271                    2
00272                  3y  + x
00273 
00274 
00275     This formula has one division fewer than the one above; however,
00276     it requires more multiplications and additions. Also x must be
00277     scaled in advance to avoid spurious overflow in evaluating the
00278     expression 3y*y+x. Hence it is not recommended uless division
00279     is slow. If division is very slow, then one should use the 
00280     reciproot algorithm given in section B.
00281 
00282     (3) Final adjustment
00283 
00284     By twiddling y's last bit it is possible to force y to be 
00285     correctly rounded according to the prevailing rounding mode
00286     as follows. Let r and i be copies of the rounding mode and
00287     inexact flag before entering the square root program. Also we
00288     use the expression y+-ulp for the next representable floating
00289     numbers (up and down) of y. Note that y+-ulp = either fixed
00290     point y+-1, or multiply y by nextafter(1,+-inf) in chopped
00291     mode.
00292 
00293         I := FALSE; ... reset INEXACT flag I
00294         R := RZ;    ... set rounding mode to round-toward-zero
00295         z := x/y;   ... chopped quotient, possibly inexact
00296         If(not I) then {    ... if the quotient is exact
00297             if(z=y) {
00298                 I := i;  ... restore inexact flag
00299                 R := r;  ... restore rounded mode
00300                 return sqrt(x):=y.
00301             } else {
00302             z := z - ulp;   ... special rounding
00303             }
00304         }
00305         i := TRUE;      ... sqrt(x) is inexact
00306         If (r=RN) then z=z+ulp  ... rounded-to-nearest
00307         If (r=RP) then {    ... round-toward-+inf
00308             y = y+ulp; z=z+ulp;
00309         }
00310         y := y+z;       ... chopped sum
00311         y0:=y0-0x00100000;  ... y := y/2 is correctly rounded.
00312             I := i;         ... restore inexact flag
00313             R := r;         ... restore rounded mode
00314             return sqrt(x):=y.
00315             
00316     (4) Special cases
00317 
00318     Square root of +inf, +-0, or NaN is itself;
00319     Square root of a negative number is NaN with invalid signal.
00320 
00321 
00322 B.  sqrt(x) by Reciproot Iteration
00323 
00324    (1)  Initial approximation
00325 
00326     Let x0 and x1 be the leading and the trailing 32-bit words of
00327     a floating point number x (in IEEE double format) respectively
00328     (see section A). By performing shifs and subtracts on x0 and y0,
00329     we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
00330 
00331         k := 0x5fe80000 - (x0>>1);
00332         y0:= k - T2[63&(k>>14)].    ... y ~ 1/sqrt(x) to 7.8 bits
00333 
00334     Here k is a 32-bit integer and T2[] is an integer array 
00335     containing correction terms. Now magically the floating
00336     value of y (y's leading 32-bit word is y0, the value of
00337     its trailing word y1 is set to zero) approximates 1/sqrt(x)
00338     to almost 7.8-bit.
00339 
00340     Value of T2:
00341     static int T2[64]= {
00342     0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
00343     0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
00344     0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
00345     0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
00346     0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
00347     0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
00348     0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
00349     0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
00350 
00351     (2) Iterative refinement
00352 
00353     Apply Reciproot iteration three times to y and multiply the
00354     result by x to get an approximation z that matches sqrt(x)
00355     to about 1 ulp. To be exact, we will have 
00356         -1ulp < sqrt(x)-z<1.0625ulp.
00357     
00358     ... set rounding mode to Round-to-nearest
00359        y := y*(1.5-0.5*x*y*y)   ... almost 15 sig. bits to 1/sqrt(x)
00360        y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
00361     ... special arrangement for better accuracy
00362        z := x*y         ... 29 bits to sqrt(x), with z*y<1
00363        z := z + 0.5*z*(1-z*y)   ... about 1 ulp to sqrt(x)
00364 
00365     Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
00366     (a) the term z*y in the final iteration is always less than 1; 
00367     (b) the error in the final result is biased upward so that
00368         -1 ulp < sqrt(x) - z < 1.0625 ulp
00369         instead of |sqrt(x)-z|<1.03125ulp.
00370 
00371     (3) Final adjustment
00372 
00373     By twiddling y's last bit it is possible to force y to be 
00374     correctly rounded according to the prevailing rounding mode
00375     as follows. Let r and i be copies of the rounding mode and
00376     inexact flag before entering the square root program. Also we
00377     use the expression y+-ulp for the next representable floating
00378     numbers (up and down) of y. Note that y+-ulp = either fixed
00379     point y+-1, or multiply y by nextafter(1,+-inf) in chopped
00380     mode.
00381 
00382     R := RZ;        ... set rounding mode to round-toward-zero
00383     switch(r) {
00384         case RN:        ... round-to-nearest
00385            if(x<= z*(z-ulp)...chopped) z = z - ulp; else
00386            if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
00387            break;
00388         case RZ:case RM:    ... round-to-zero or round-to--inf
00389            R:=RP;       ... reset rounding mod to round-to-+inf
00390            if(x<z*z ... rounded up) z = z - ulp; else
00391            if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
00392            break;
00393         case RP:        ... round-to-+inf
00394            if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
00395            if(x>z*z ...chopped) z = z+ulp;
00396            break;
00397     }
00398 
00399     Remark 3. The above comparisons can be done in fixed point. For
00400     example, to compare x and w=z*z chopped, it suffices to compare
00401     x1 and w1 (the trailing parts of x and w), regarding them as
00402     two's complement integers.
00403 
00404     ...Is z an exact square root?
00405     To determine whether z is an exact square root of x, let z1 be the
00406     trailing part of z, and also let x0 and x1 be the leading and
00407     trailing parts of x.
00408 
00409     If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
00410         I := 1;     ... Raise Inexact flag: z is not exact
00411     else {
00412         j := 1 - [(x0>>20)&1]   ... j = logb(x) mod 2
00413         k := z1 >> 26;      ... get z's 25-th and 26-th 
00414                         fraction bits
00415         I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
00416     }
00417     R:= r       ... restore rounded mode
00418     return sqrt(x):=z.
00419 
00420     If multiplication is cheaper then the foregoing red tape, the 
00421     Inexact flag can be evaluated by
00422 
00423         I := i;
00424         I := (z*z!=x) or I.
00425 
00426     Note that z*z can overwrite I; this value must be sensed if it is 
00427     True.
00428 
00429     Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
00430     zero.
00431 
00432             --------------------
00433         z1: |        f2        | 
00434             --------------------
00435         bit 31         bit 0
00436 
00437     Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
00438     or even of logb(x) have the following relations:
00439 
00440     -------------------------------------------------
00441     bit 27,26 of z1     bit 1,0 of x1   logb(x)
00442     -------------------------------------------------
00443     00          00      odd and even
00444     01          01      even
00445     10          10      odd
00446     10          00      even
00447     11          01      even
00448     -------------------------------------------------
00449 
00450     (4) Special cases (see (4) of Section A).   
00451  
00452  */
00453