libpok-refman-current-html/html/e__sqrt_8c_source.html

00001 /*
00002  *                               POK header
00003  *
00004  * The following file is a part of the POK project. Any modification should
00005  * made according to the POK licence. You CANNOT use this file or a part of
00006  * this file is this part of a file for your own project
00007  *
00008  * For more information on the POK licence, please see our LICENCE FILE
00009  *
00010  * Please follow the coding guidelines described in doc/CODING_GUIDELINES
00011  *
00012  *                                      Copyright (c) 2007-2009 POK team
00013  *
00014  * Created by julien on Fri Jan 30 13:44:27 2009
00015  */
00016
00017 /* @(#)e_sqrt.c 5.1 93/09/24 */
00018 /*
00019  * ====================================================
00020  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
00021  *
00022  * Developed at SunPro, a Sun Microsystems, Inc. business.
00023  * Permission to use, copy, modify, and distribute this
00024  * software is freely granted, provided that this notice
00025  * is preserved.
00026  * ====================================================
00027  */
00028
00029 /* __ieee754_sqrt(x)
00030  * Return correctly rounded sqrt.
00031  *           ------------------------------------------
00032  *           |  Use the hardware sqrt if you have one |
00033  *           ------------------------------------------
00034  * Method:
00035  *   Bit by bit method using integer arithmetic. (Slow, but portable)
00036  *   1. Normalization
00037  *      Scale x to y in [1,4) with even powers of 2:
00038  *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
00039  *              sqrt(x) = 2^k * sqrt(y)
00040  *   2. Bit by bit computation
00041  *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
00042  *           i                                                   0
00043  *                                     i+1         2
00044  *          s  = 2*q , and      y  =  2   * ( y - q  ).         (1)
00045  *           i      i            i                 i
00046  *
00047  *      To compute q    from q , one checks whether
00048  *                  i+1       i
00049  *
00050  *                            -(i+1) 2
00051  *                      (q + 2      ) <= y.                     (2)
00052  *                        i
00053  *                                                            -(i+1)
00054  *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
00055  *                             i+1   i             i+1   i
00056  *
00057  *      With some algebric manipulation, it is not difficult to see
00058  *      that (2) is equivalent to
00059  *                             -(i+1)
00060  *                      s  +  2       <= y                      (3)
00061  *                       i                i
00062  *
00063  *      The advantage of (3) is that s  and y  can be computed by
00064  *                                    i      i
00065  *      the following recurrence formula:
00066  *          if (3) is false
00067  *
00068  *          s     =  s  ,       y    = y   ;                    (4)
00069  *           i+1      i          i+1    i
00070  *
00071  *          otherwise,
00072  *                         -i                     -(i+1)
00073  *          s     =  s  + 2  ,  y    = y  -  s  - 2             (5)
00074  *           i+1      i          i+1    i     i
00075  *
00076  *      One may easily use induction to prove (4) and (5).
00077  *      Note. Since the left hand side of (3) contain only i+2 bits,
00078  *            it does not necessary to do a full (53-bit) comparison
00079  *            in (3).
00080  *   3. Final rounding
00081  *      After generating the 53 bits result, we compute one more bit.
00082  *      Together with the remainder, we can decide whether the
00083  *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
00084  *      (it will never equal to 1/2ulp).
00085  *      The rounding mode can be detected by checking whether
00086  *      huge + tiny is equal to huge, and whether huge - tiny is
00087  *      equal to huge for some floating point number "huge" and "tiny".
00088  *
00089  * Special cases:
00090  *      sqrt(+-0) = +-0         ... exact
00091  *      sqrt(inf) = inf
00092  *      sqrt(-ve) = NaN         ... with invalid signal
00093  *      sqrt(NaN) = NaN         ... with invalid signal for signaling NaN
00094  *
00095  * Other methods : see the appended file at the end of the program below.
00096  *---------------
00097  */
00098
00099 #ifdef POK_NEEDS_LIBMATH
00100
00101 #include <types.h>
00102 #include "math_private.h"
00103
00104 static  const double    one     = 1.0, tiny=1.0e-300;
00105
00106 double
00107 __ieee754_sqrt(double x)
00108 {
00109         double z;
00110         int32_t sign = (int)0x80000000;
00111         int32_t ix0,s0,q,m,t,i;
00112         uint32_t r,t1,s1,ix1,q1;
00113
00114         EXTRACT_WORDS(ix0,ix1,x);
00115
00116     /* take care of Inf and NaN */
00117         if((ix0&0x7ff00000)==0x7ff00000) {
00118             return x*x+x;               /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
00119                                            sqrt(-inf)=sNaN */
00120         }
00121     /* take care of zero */
00122         if(ix0<=0) {
00123             if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
00124             else if(ix0<0)
00125                 return (x-x)/(x-x);             /* sqrt(-ve) = sNaN */
00126         }
00127     /* normalize x */
00128         m = (ix0>>20);
00129         if(m==0) {                              /* subnormal x */
00130             while(ix0==0) {
00131                 m -= 21;
00132                 ix0 |= (ix1>>11); ix1 <<= 21;
00133             }
00134             for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
00135             m -= i-1;
00136             ix0 |= (ix1>>(32-i));
00137             ix1 <<= i;
00138         }
00139         m -= 1023;      /* unbias exponent */
00140         ix0 = (ix0&0x000fffff)|0x00100000;
00141         if(m&1){        /* odd m, double x to make it even */
00142             ix0 += ix0 + ((ix1&sign)>>31);
00143             ix1 += ix1;
00144         }
00145         m >>= 1;        /* m = [m/2] */
00146
00147     /* generate sqrt(x) bit by bit */
00148         ix0 += ix0 + ((ix1&sign)>>31);
00149         ix1 += ix1;
00150         q = q1 = s0 = s1 = 0;   /* [q,q1] = sqrt(x) */
00151         r = 0x00200000;         /* r = moving bit from right to left */
00152
00153         while(r!=0) {
00154             t = s0+r;
00155             if(t<=ix0) {
00156                 s0   = t+r;
00157                 ix0 -= t;
00158                 q   += r;
00159             }
00160             ix0 += ix0 + ((ix1&sign)>>31);
00161             ix1 += ix1;
00162             r>>=1;
00163         }
00164
00165         r = sign;
00166         while(r!=0) {
00167             t1 = s1+r;
00168             t  = s0;
00169             if((t<ix0)||((t==ix0)&&(t1<=ix1))) {
00170                 s1  = t1+r;
00171                 if(((t1&sign)==(uint32_t)sign)&&(s1&sign)==0) s0 += 1;
00172                 ix0 -= t;
00173                 if (ix1 < t1) ix0 -= 1;
00174                 ix1 -= t1;
00175                 q1  += r;
00176             }
00177             ix0 += ix0 + ((ix1&sign)>>31);
00178             ix1 += ix1;
00179             r>>=1;
00180         }
00181
00182     /* use floating add to find out rounding direction */
00183         if((ix0|ix1)!=0) {
00184             z = one-tiny; /* trigger inexact flag */
00185             if (z>=one) {
00186                 z = one+tiny;
00187                 if (q1==(uint32_t)0xffffffff) { q1=0; q += 1;}
00188                 else if (z>one) {
00189                     if (q1==(uint32_t)0xfffffffe) q+=1;
00190                     q1+=2;
00191                 } else
00192                     q1 += (q1&1);
00193             }
00194         }
00195         ix0 = (q>>1)+0x3fe00000;
00196         ix1 =  q1>>1;
00197         if ((q&1)==1) ix1 |= sign;
00198         ix0 += (m <<20);
00199         INSERT_WORDS(z,ix0,ix1);
00200         return z;
00201 }
00202
00203 /*
00204 Other methods  (use floating-point arithmetic)
00205 -------------
00206 (This is a copy of a drafted paper by Prof W. Kahan
00207 and K.C. Ng, written in May, 1986)
00208
00209         Two algorithms are given here to implement sqrt(x)
00210         (IEEE double precision arithmetic) in software.
00211         Both supply sqrt(x) correctly rounded. The first algorithm (in
00212         Section A) uses newton iterations and involves four divisions.
00213         The second one uses reciproot iterations to avoid division, but
00214         requires more multiplications. Both algorithms need the ability
00215         to chop results of arithmetic operations instead of round them,
00216         and the INEXACT flag to indicate when an arithmetic operation
00217         is executed exactly with no roundoff error, all part of the
00218         standard (IEEE 754-1985). The ability to perform shift, add,
00219         subtract and logical AND operations upon 32-bit words is needed
00220         too, though not part of the standard.
00221
00222 A.  sqrt(x) by Newton Iteration
00223
00224    (1)  Initial approximation
00225
00226         Let x0 and x1 be the leading and the trailing 32-bit words of
00227         a floating point number x (in IEEE double format) respectively
00228
00229             1    11                  52                           ...widths
00230            ------------------------------------------------------
00231         x: |s|    e     |             f                         |
00232            ------------------------------------------------------
00233               msb    lsb  msb                                 lsb ...order
00234
00235
00236              ------------------------        ------------------------
00237         x0:  |s|   e    |    f1     |    x1: |          f2           |
00238              ------------------------        ------------------------
00239
00240         By performing shifts and subtracts on x0 and x1 (both regarded
00241         as integers), we obtain an 8-bit approximation of sqrt(x) as
00242         follows.
00243
00244                 k  := (x0>>1) + 0x1ff80000;
00245                 y0 := k - T1[31&(k>>15)].       ... y ~ sqrt(x) to 8 bits
00246         Here k is a 32-bit integer and T1[] is an integer array containing
00247         correction terms. Now magically the floating value of y (y's
00248         leading 32-bit word is y0, the value of its trailing word is 0)
00249         approximates sqrt(x) to almost 8-bit.
00250
00251         Value of T1:
00252         static int T1[32]= {
00253         0,      1024,   3062,   5746,   9193,   13348,  18162,  23592,
00254         29598,  36145,  43202,  50740,  58733,  67158,  75992,  85215,
00255         83599,  71378,  60428,  50647,  41945,  34246,  27478,  21581,
00256         16499,  12183,  8588,   5674,   3403,   1742,   661,    130,};
00257
00258     (2) Iterative refinement
00259
00260         Apply Heron's rule three times to y, we have y approximates
00261         sqrt(x) to within 1 ulp (Unit in the Last Place):
00262
00263                 y := (y+x/y)/2          ... almost 17 sig. bits
00264                 y := (y+x/y)/2          ... almost 35 sig. bits
00265                 y := y-(y-x/y)/2        ... within 1 ulp
00266
00267
00268         Remark 1.
00269             Another way to improve y to within 1 ulp is:
00270
00271                 y := (y+x/y)            ... almost 17 sig. bits to 2*sqrt(x)
00272                 y := y - 0x00100006     ... almost 18 sig. bits to sqrt(x)
00273
00274                                 2
00275                             (x-y )*y
00276                 y := y + 2* ----------  ...within 1 ulp
00277                                2
00278                              3y  + x
00279
00280
00281         This formula has one division fewer than the one above; however,
00282         it requires more multiplications and additions. Also x must be
00283         scaled in advance to avoid spurious overflow in evaluating the
00284         expression 3y*y+x. Hence it is not recommended uless division
00285         is slow. If division is very slow, then one should use the
00286         reciproot algorithm given in section B.
00287
00288     (3) Final adjustment
00289
00290         By twiddling y's last bit it is possible to force y to be
00291         correctly rounded according to the prevailing rounding mode
00292         as follows. Let r and i be copies of the rounding mode and
00293         inexact flag before entering the square root program. Also we
00294         use the expression y+-ulp for the next representable floating
00295         numbers (up and down) of y. Note that y+-ulp = either fixed
00296         point y+-1, or multiply y by nextafter(1,+-inf) in chopped
00297         mode.
00298
00299                 I := FALSE;     ... reset INEXACT flag I
00300                 R := RZ;        ... set rounding mode to round-toward-zero
00301                 z := x/y;       ... chopped quotient, possibly inexact
00302                 If(not I) then {        ... if the quotient is exact
00303                     if(z=y) {
00304                         I := i;  ... restore inexact flag
00305                         R := r;  ... restore rounded mode
00306                         return sqrt(x):=y.
00307                     } else {
00308                         z := z - ulp;   ... special rounding
00309                     }
00310                 }
00311                 i := TRUE;              ... sqrt(x) is inexact
00312                 If (r=RN) then z=z+ulp  ... rounded-to-nearest
00313                 If (r=RP) then {        ... round-toward-+inf
00314                     y = y+ulp; z=z+ulp;
00315                 }
00316                 y := y+z;               ... chopped sum
00317                 y0:=y0-0x00100000;      ... y := y/2 is correctly rounded.
00318                 I := i;                 ... restore inexact flag
00319                 R := r;                 ... restore rounded mode
00320                 return sqrt(x):=y.
00321
00322     (4) Special cases
00323
00324         Square root of +inf, +-0, or NaN is itself;
00325         Square root of a negative number is NaN with invalid signal.
00326
00327
00328 B.  sqrt(x) by Reciproot Iteration
00329
00330    (1)  Initial approximation
00331
00332         Let x0 and x1 be the leading and the trailing 32-bit words of
00333         a floating point number x (in IEEE double format) respectively
00334         (see section A). By performing shifs and subtracts on x0 and y0,
00335         we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
00336
00337             k := 0x5fe80000 - (x0>>1);
00338             y0:= k - T2[63&(k>>14)].    ... y ~ 1/sqrt(x) to 7.8 bits
00339
00340         Here k is a 32-bit integer and T2[] is an integer array
00341         containing correction terms. Now magically the floating
00342         value of y (y's leading 32-bit word is y0, the value of
00343         its trailing word y1 is set to zero) approximates 1/sqrt(x)
00344         to almost 7.8-bit.
00345
00346         Value of T2:
00347         static int T2[64]= {
00348         0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866,
00349         0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
00350         0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
00351         0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
00352         0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
00353         0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
00354         0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
00355         0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,};
00356
00357     (2) Iterative refinement
00358
00359         Apply Reciproot iteration three times to y and multiply the
00360         result by x to get an approximation z that matches sqrt(x)
00361         to about 1 ulp. To be exact, we will have
00362                 -1ulp < sqrt(x)-z<1.0625ulp.
00363
00364         ... set rounding mode to Round-to-nearest
00365            y := y*(1.5-0.5*x*y*y)       ... almost 15 sig. bits to 1/sqrt(x)
00366            y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
00367         ... special arrangement for better accuracy
00368            z := x*y                     ... 29 bits to sqrt(x), with z*y<1
00369            z := z + 0.5*z*(1-z*y)       ... about 1 ulp to sqrt(x)
00370
00371         Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
00372         (a) the term z*y in the final iteration is always less than 1;
00373         (b) the error in the final result is biased upward so that
00374                 -1 ulp < sqrt(x) - z < 1.0625 ulp
00375             instead of |sqrt(x)-z|<1.03125ulp.
00376
00377     (3) Final adjustment
00378
00379         By twiddling y's last bit it is possible to force y to be
00380         correctly rounded according to the prevailing rounding mode
00381         as follows. Let r and i be copies of the rounding mode and
00382         inexact flag before entering the square root program. Also we
00383         use the expression y+-ulp for the next representable floating
00384         numbers (up and down) of y. Note that y+-ulp = either fixed
00385         point y+-1, or multiply y by nextafter(1,+-inf) in chopped
00386         mode.
00387
00388         R := RZ;                ... set rounding mode to round-toward-zero
00389         switch(r) {
00390             case RN:            ... round-to-nearest
00391                if(x<= z*(z-ulp)...chopped) z = z - ulp; else
00392                if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
00393                break;
00394             case RZ:case RM:    ... round-to-zero or round-to--inf
00395                R:=RP;           ... reset rounding mod to round-to-+inf
00396                if(x<z*z ... rounded up) z = z - ulp; else
00397                if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
00398                break;
00399             case RP:            ... round-to-+inf
00400                if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
00401                if(x>z*z ...chopped) z = z+ulp;
00402                break;
00403         }
00404
00405         Remark 3. The above comparisons can be done in fixed point. For
00406         example, to compare x and w=z*z chopped, it suffices to compare
00407         x1 and w1 (the trailing parts of x and w), regarding them as
00408         two's complement integers.
00409
00410         ...Is z an exact square root?
00411         To determine whether z is an exact square root of x, let z1 be the
00412         trailing part of z, and also let x0 and x1 be the leading and
00413         trailing parts of x.
00414
00415         If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0
00416             I := 1;             ... Raise Inexact flag: z is not exact
00417         else {
00418             j := 1 - [(x0>>20)&1]       ... j = logb(x) mod 2
00419             k := z1 >> 26;              ... get z's 25-th and 26-th
00420                                             fraction bits
00421             I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
00422         }
00423         R:= r           ... restore rounded mode
00424         return sqrt(x):=z.
00425
00426         If multiplication is cheaper than the foregoing red tape, the
00427         Inexact flag can be evaluated by
00428
00429             I := i;
00430             I := (z*z!=x) or I.
00431
00432         Note that z*z can overwrite I; this value must be sensed if it is
00433         True.
00434
00435         Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
00436         zero.
00437
00438                     --------------------
00439                 z1: |        f2        |
00440                     --------------------
00441                 bit 31             bit 0
00442
00443         Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
00444         or even of logb(x) have the following relations:
00445
00446         -------------------------------------------------
00447         bit 27,26 of z1         bit 1,0 of x1   logb(x)
00448         -------------------------------------------------
00449         00                      00              odd and even
00450         01                      01              even
00451         10                      10              odd
00452         10                      00              even
00453         11                      01              even
00454         -------------------------------------------------
00455
00456     (4) Special cases (see (4) of Section A).
00457
00458  */
00459
00460 #endif
00461