POK
|
00001 /* 00002 * POK header 00003 * 00004 * The following file is a part of the POK project. Any modification should 00005 * made according to the POK licence. You CANNOT use this file or a part of 00006 * this file is this part of a file for your own project 00007 * 00008 * For more information on the POK licence, please see our LICENCE FILE 00009 * 00010 * Please follow the coding guidelines described in doc/CODING_GUIDELINES 00011 * 00012 * Copyright (c) 2007-2009 POK team 00013 * 00014 * Created by julien on Fri Jan 30 13:44:27 2009 00015 */ 00016 00017 /* @(#)e_sqrt.c 5.1 93/09/24 */ 00018 /* 00019 * ==================================================== 00020 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 00021 * 00022 * Developed at SunPro, a Sun Microsystems, Inc. business. 00023 * Permission to use, copy, modify, and distribute this 00024 * software is freely granted, provided that this notice 00025 * is preserved. 00026 * ==================================================== 00027 */ 00028 00029 /* __ieee754_sqrt(x) 00030 * Return correctly rounded sqrt. 00031 * ------------------------------------------ 00032 * | Use the hardware sqrt if you have one | 00033 * ------------------------------------------ 00034 * Method: 00035 * Bit by bit method using integer arithmetic. (Slow, but portable) 00036 * 1. Normalization 00037 * Scale x to y in [1,4) with even powers of 2: 00038 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then 00039 * sqrt(x) = 2^k * sqrt(y) 00040 * 2. Bit by bit computation 00041 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), 00042 * i 0 00043 * i+1 2 00044 * s = 2*q , and y = 2 * ( y - q ). (1) 00045 * i i i i 00046 * 00047 * To compute q from q , one checks whether 00048 * i+1 i 00049 * 00050 * -(i+1) 2 00051 * (q + 2 ) <= y. (2) 00052 * i 00053 * -(i+1) 00054 * If (2) is false, then q = q ; otherwise q = q + 2 . 00055 * i+1 i i+1 i 00056 * 00057 * With some algebric manipulation, it is not difficult to see 00058 * that (2) is equivalent to 00059 * -(i+1) 00060 * s + 2 <= y (3) 00061 * i i 00062 * 00063 * The advantage of (3) is that s and y can be computed by 00064 * i i 00065 * the following recurrence formula: 00066 * if (3) is false 00067 * 00068 * s = s , y = y ; (4) 00069 * i+1 i i+1 i 00070 * 00071 * otherwise, 00072 * -i -(i+1) 00073 * s = s + 2 , y = y - s - 2 (5) 00074 * i+1 i i+1 i i 00075 * 00076 * One may easily use induction to prove (4) and (5). 00077 * Note. Since the left hand side of (3) contain only i+2 bits, 00078 * it does not necessary to do a full (53-bit) comparison 00079 * in (3). 00080 * 3. Final rounding 00081 * After generating the 53 bits result, we compute one more bit. 00082 * Together with the remainder, we can decide whether the 00083 * result is exact, bigger than 1/2ulp, or less than 1/2ulp 00084 * (it will never equal to 1/2ulp). 00085 * The rounding mode can be detected by checking whether 00086 * huge + tiny is equal to huge, and whether huge - tiny is 00087 * equal to huge for some floating point number "huge" and "tiny". 00088 * 00089 * Special cases: 00090 * sqrt(+-0) = +-0 ... exact 00091 * sqrt(inf) = inf 00092 * sqrt(-ve) = NaN ... with invalid signal 00093 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN 00094 * 00095 * Other methods : see the appended file at the end of the program below. 00096 *--------------- 00097 */ 00098 00099 #ifdef POK_NEEDS_LIBMATH 00100 00101 #include <types.h> 00102 #include "math_private.h" 00103 00104 static const double one = 1.0, tiny=1.0e-300; 00105 00106 double 00107 __ieee754_sqrt(double x) 00108 { 00109 double z; 00110 int32_t sign = (int)0x80000000; 00111 int32_t ix0,s0,q,m,t,i; 00112 uint32_t r,t1,s1,ix1,q1; 00113 00114 EXTRACT_WORDS(ix0,ix1,x); 00115 00116 /* take care of Inf and NaN */ 00117 if((ix0&0x7ff00000)==0x7ff00000) { 00118 return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf 00119 sqrt(-inf)=sNaN */ 00120 } 00121 /* take care of zero */ 00122 if(ix0<=0) { 00123 if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ 00124 else if(ix0<0) 00125 return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ 00126 } 00127 /* normalize x */ 00128 m = (ix0>>20); 00129 if(m==0) { /* subnormal x */ 00130 while(ix0==0) { 00131 m -= 21; 00132 ix0 |= (ix1>>11); ix1 <<= 21; 00133 } 00134 for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; 00135 m -= i-1; 00136 ix0 |= (ix1>>(32-i)); 00137 ix1 <<= i; 00138 } 00139 m -= 1023; /* unbias exponent */ 00140 ix0 = (ix0&0x000fffff)|0x00100000; 00141 if(m&1){ /* odd m, double x to make it even */ 00142 ix0 += ix0 + ((ix1&sign)>>31); 00143 ix1 += ix1; 00144 } 00145 m >>= 1; /* m = [m/2] */ 00146 00147 /* generate sqrt(x) bit by bit */ 00148 ix0 += ix0 + ((ix1&sign)>>31); 00149 ix1 += ix1; 00150 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ 00151 r = 0x00200000; /* r = moving bit from right to left */ 00152 00153 while(r!=0) { 00154 t = s0+r; 00155 if(t<=ix0) { 00156 s0 = t+r; 00157 ix0 -= t; 00158 q += r; 00159 } 00160 ix0 += ix0 + ((ix1&sign)>>31); 00161 ix1 += ix1; 00162 r>>=1; 00163 } 00164 00165 r = sign; 00166 while(r!=0) { 00167 t1 = s1+r; 00168 t = s0; 00169 if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 00170 s1 = t1+r; 00171 if(((t1&sign)==(uint32_t)sign)&&(s1&sign)==0) s0 += 1; 00172 ix0 -= t; 00173 if (ix1 < t1) ix0 -= 1; 00174 ix1 -= t1; 00175 q1 += r; 00176 } 00177 ix0 += ix0 + ((ix1&sign)>>31); 00178 ix1 += ix1; 00179 r>>=1; 00180 } 00181 00182 /* use floating add to find out rounding direction */ 00183 if((ix0|ix1)!=0) { 00184 z = one-tiny; /* trigger inexact flag */ 00185 if (z>=one) { 00186 z = one+tiny; 00187 if (q1==(uint32_t)0xffffffff) { q1=0; q += 1;} 00188 else if (z>one) { 00189 if (q1==(uint32_t)0xfffffffe) q+=1; 00190 q1+=2; 00191 } else 00192 q1 += (q1&1); 00193 } 00194 } 00195 ix0 = (q>>1)+0x3fe00000; 00196 ix1 = q1>>1; 00197 if ((q&1)==1) ix1 |= sign; 00198 ix0 += (m <<20); 00199 INSERT_WORDS(z,ix0,ix1); 00200 return z; 00201 } 00202 00203 /* 00204 Other methods (use floating-point arithmetic) 00205 ------------- 00206 (This is a copy of a drafted paper by Prof W. Kahan 00207 and K.C. Ng, written in May, 1986) 00208 00209 Two algorithms are given here to implement sqrt(x) 00210 (IEEE double precision arithmetic) in software. 00211 Both supply sqrt(x) correctly rounded. The first algorithm (in 00212 Section A) uses newton iterations and involves four divisions. 00213 The second one uses reciproot iterations to avoid division, but 00214 requires more multiplications. Both algorithms need the ability 00215 to chop results of arithmetic operations instead of round them, 00216 and the INEXACT flag to indicate when an arithmetic operation 00217 is executed exactly with no roundoff error, all part of the 00218 standard (IEEE 754-1985). The ability to perform shift, add, 00219 subtract and logical AND operations upon 32-bit words is needed 00220 too, though not part of the standard. 00221 00222 A. sqrt(x) by Newton Iteration 00223 00224 (1) Initial approximation 00225 00226 Let x0 and x1 be the leading and the trailing 32-bit words of 00227 a floating point number x (in IEEE double format) respectively 00228 00229 1 11 52 ...widths 00230 ------------------------------------------------------ 00231 x: |s| e | f | 00232 ------------------------------------------------------ 00233 msb lsb msb lsb ...order 00234 00235 00236 ------------------------ ------------------------ 00237 x0: |s| e | f1 | x1: | f2 | 00238 ------------------------ ------------------------ 00239 00240 By performing shifts and subtracts on x0 and x1 (both regarded 00241 as integers), we obtain an 8-bit approximation of sqrt(x) as 00242 follows. 00243 00244 k := (x0>>1) + 0x1ff80000; 00245 y0 := k - T1[31&(k>>15)]. ... y ~ sqrt(x) to 8 bits 00246 Here k is a 32-bit integer and T1[] is an integer array containing 00247 correction terms. Now magically the floating value of y (y's 00248 leading 32-bit word is y0, the value of its trailing word is 0) 00249 approximates sqrt(x) to almost 8-bit. 00250 00251 Value of T1: 00252 static int T1[32]= { 00253 0, 1024, 3062, 5746, 9193, 13348, 18162, 23592, 00254 29598, 36145, 43202, 50740, 58733, 67158, 75992, 85215, 00255 83599, 71378, 60428, 50647, 41945, 34246, 27478, 21581, 00256 16499, 12183, 8588, 5674, 3403, 1742, 661, 130,}; 00257 00258 (2) Iterative refinement 00259 00260 Apply Heron's rule three times to y, we have y approximates 00261 sqrt(x) to within 1 ulp (Unit in the Last Place): 00262 00263 y := (y+x/y)/2 ... almost 17 sig. bits 00264 y := (y+x/y)/2 ... almost 35 sig. bits 00265 y := y-(y-x/y)/2 ... within 1 ulp 00266 00267 00268 Remark 1. 00269 Another way to improve y to within 1 ulp is: 00270 00271 y := (y+x/y) ... almost 17 sig. bits to 2*sqrt(x) 00272 y := y - 0x00100006 ... almost 18 sig. bits to sqrt(x) 00273 00274 2 00275 (x-y )*y 00276 y := y + 2* ---------- ...within 1 ulp 00277 2 00278 3y + x 00279 00280 00281 This formula has one division fewer than the one above; however, 00282 it requires more multiplications and additions. Also x must be 00283 scaled in advance to avoid spurious overflow in evaluating the 00284 expression 3y*y+x. Hence it is not recommended uless division 00285 is slow. If division is very slow, then one should use the 00286 reciproot algorithm given in section B. 00287 00288 (3) Final adjustment 00289 00290 By twiddling y's last bit it is possible to force y to be 00291 correctly rounded according to the prevailing rounding mode 00292 as follows. Let r and i be copies of the rounding mode and 00293 inexact flag before entering the square root program. Also we 00294 use the expression y+-ulp for the next representable floating 00295 numbers (up and down) of y. Note that y+-ulp = either fixed 00296 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 00297 mode. 00298 00299 I := FALSE; ... reset INEXACT flag I 00300 R := RZ; ... set rounding mode to round-toward-zero 00301 z := x/y; ... chopped quotient, possibly inexact 00302 If(not I) then { ... if the quotient is exact 00303 if(z=y) { 00304 I := i; ... restore inexact flag 00305 R := r; ... restore rounded mode 00306 return sqrt(x):=y. 00307 } else { 00308 z := z - ulp; ... special rounding 00309 } 00310 } 00311 i := TRUE; ... sqrt(x) is inexact 00312 If (r=RN) then z=z+ulp ... rounded-to-nearest 00313 If (r=RP) then { ... round-toward-+inf 00314 y = y+ulp; z=z+ulp; 00315 } 00316 y := y+z; ... chopped sum 00317 y0:=y0-0x00100000; ... y := y/2 is correctly rounded. 00318 I := i; ... restore inexact flag 00319 R := r; ... restore rounded mode 00320 return sqrt(x):=y. 00321 00322 (4) Special cases 00323 00324 Square root of +inf, +-0, or NaN is itself; 00325 Square root of a negative number is NaN with invalid signal. 00326 00327 00328 B. sqrt(x) by Reciproot Iteration 00329 00330 (1) Initial approximation 00331 00332 Let x0 and x1 be the leading and the trailing 32-bit words of 00333 a floating point number x (in IEEE double format) respectively 00334 (see section A). By performing shifs and subtracts on x0 and y0, 00335 we obtain a 7.8-bit approximation of 1/sqrt(x) as follows. 00336 00337 k := 0x5fe80000 - (x0>>1); 00338 y0:= k - T2[63&(k>>14)]. ... y ~ 1/sqrt(x) to 7.8 bits 00339 00340 Here k is a 32-bit integer and T2[] is an integer array 00341 containing correction terms. Now magically the floating 00342 value of y (y's leading 32-bit word is y0, the value of 00343 its trailing word y1 is set to zero) approximates 1/sqrt(x) 00344 to almost 7.8-bit. 00345 00346 Value of T2: 00347 static int T2[64]= { 00348 0x1500, 0x2ef8, 0x4d67, 0x6b02, 0x87be, 0xa395, 0xbe7a, 0xd866, 00349 0xf14a, 0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f, 00350 0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d, 00351 0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0, 00352 0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989, 00353 0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd, 00354 0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e, 00355 0x1527f,0x1334a,0x11051,0xe951, 0xbe01, 0x8e0d, 0x5924, 0x1edd,}; 00356 00357 (2) Iterative refinement 00358 00359 Apply Reciproot iteration three times to y and multiply the 00360 result by x to get an approximation z that matches sqrt(x) 00361 to about 1 ulp. To be exact, we will have 00362 -1ulp < sqrt(x)-z<1.0625ulp. 00363 00364 ... set rounding mode to Round-to-nearest 00365 y := y*(1.5-0.5*x*y*y) ... almost 15 sig. bits to 1/sqrt(x) 00366 y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x) 00367 ... special arrangement for better accuracy 00368 z := x*y ... 29 bits to sqrt(x), with z*y<1 00369 z := z + 0.5*z*(1-z*y) ... about 1 ulp to sqrt(x) 00370 00371 Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that 00372 (a) the term z*y in the final iteration is always less than 1; 00373 (b) the error in the final result is biased upward so that 00374 -1 ulp < sqrt(x) - z < 1.0625 ulp 00375 instead of |sqrt(x)-z|<1.03125ulp. 00376 00377 (3) Final adjustment 00378 00379 By twiddling y's last bit it is possible to force y to be 00380 correctly rounded according to the prevailing rounding mode 00381 as follows. Let r and i be copies of the rounding mode and 00382 inexact flag before entering the square root program. Also we 00383 use the expression y+-ulp for the next representable floating 00384 numbers (up and down) of y. Note that y+-ulp = either fixed 00385 point y+-1, or multiply y by nextafter(1,+-inf) in chopped 00386 mode. 00387 00388 R := RZ; ... set rounding mode to round-toward-zero 00389 switch(r) { 00390 case RN: ... round-to-nearest 00391 if(x<= z*(z-ulp)...chopped) z = z - ulp; else 00392 if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp; 00393 break; 00394 case RZ:case RM: ... round-to-zero or round-to--inf 00395 R:=RP; ... reset rounding mod to round-to-+inf 00396 if(x<z*z ... rounded up) z = z - ulp; else 00397 if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp; 00398 break; 00399 case RP: ... round-to-+inf 00400 if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else 00401 if(x>z*z ...chopped) z = z+ulp; 00402 break; 00403 } 00404 00405 Remark 3. The above comparisons can be done in fixed point. For 00406 example, to compare x and w=z*z chopped, it suffices to compare 00407 x1 and w1 (the trailing parts of x and w), regarding them as 00408 two's complement integers. 00409 00410 ...Is z an exact square root? 00411 To determine whether z is an exact square root of x, let z1 be the 00412 trailing part of z, and also let x0 and x1 be the leading and 00413 trailing parts of x. 00414 00415 If ((z1&0x03ffffff)!=0) ... not exact if trailing 26 bits of z!=0 00416 I := 1; ... Raise Inexact flag: z is not exact 00417 else { 00418 j := 1 - [(x0>>20)&1] ... j = logb(x) mod 2 00419 k := z1 >> 26; ... get z's 25-th and 26-th 00420 fraction bits 00421 I := i or (k&j) or ((k&(j+j+1))!=(x1&3)); 00422 } 00423 R:= r ... restore rounded mode 00424 return sqrt(x):=z. 00425 00426 If multiplication is cheaper than the foregoing red tape, the 00427 Inexact flag can be evaluated by 00428 00429 I := i; 00430 I := (z*z!=x) or I. 00431 00432 Note that z*z can overwrite I; this value must be sensed if it is 00433 True. 00434 00435 Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be 00436 zero. 00437 00438 -------------------- 00439 z1: | f2 | 00440 -------------------- 00441 bit 31 bit 0 00442 00443 Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd 00444 or even of logb(x) have the following relations: 00445 00446 ------------------------------------------------- 00447 bit 27,26 of z1 bit 1,0 of x1 logb(x) 00448 ------------------------------------------------- 00449 00 00 odd and even 00450 01 01 even 00451 10 10 odd 00452 10 00 even 00453 11 01 even 00454 ------------------------------------------------- 00455 00456 (4) Special cases (see (4) of Section A). 00457 00458 */ 00459 00460 #endif 00461