POK
k_rem_pio2f.c
1 /*
2  * POK header
3  *
4  * The following file is a part of the POK project. Any modification should
5  * made according to the POK licence. You CANNOT use this file or a part of
6  * this file is this part of a file for your own project
7  *
8  * For more information on the POK licence, please see our LICENCE FILE
9  *
10  * Please follow the coding guidelines described in doc/CODING_GUIDELINES
11  *
12  * Copyright (c) 2007-2009 POK team
13  *
14  * Created by julien on Sat Jan 31 20:12:07 2009
15  */
16 
17 /* k_rem_pio2f.c -- float version of k_rem_pio2.c
18  * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
19  */
20 
21 /*
22  * ====================================================
23  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
24  *
25  * Developed at SunPro, a Sun Microsystems, Inc. business.
26  * Permission to use, copy, modify, and distribute this
27  * software is freely granted, provided that this notice
28  * is preserved.
29  * ====================================================
30  */
31 
32 #ifdef POK_NEEDS_LIBMATH
33 
34 #include <libm.h>
35 #include "math_private.h"
36 
37 /* In the float version, the input parameter x contains 8 bit
38  integers, not 24 bit integers. 113 bit precision is not supported. */
39 
40 static const int init_jk[] = {4,7,9}; /* initial value for jk */
41 
42 static const float PIo2[] = {
43  1.5703125000e+00, /* 0x3fc90000 */
44  4.5776367188e-04, /* 0x39f00000 */
45  2.5987625122e-05, /* 0x37da0000 */
46  7.5437128544e-08, /* 0x33a20000 */
47  6.0026650317e-11, /* 0x2e840000 */
48  7.3896444519e-13, /* 0x2b500000 */
49  5.3845816694e-15, /* 0x27c20000 */
50  5.6378512969e-18, /* 0x22d00000 */
51  8.3009228831e-20, /* 0x1fc40000 */
52  3.2756352257e-22, /* 0x1bc60000 */
53  6.3331015649e-25, /* 0x17440000 */
54 };
55 
56 static const float
57 zero = 0.0,
58 one = 1.0,
59 two8 = 2.5600000000e+02, /* 0x43800000 */
60 twon8 = 3.9062500000e-03; /* 0x3b800000 */
61 
62 int
63 __kernel_rem_pio2f(float *x, float *y, int e0, int nx, int prec, const int *ipio2)
64 {
65  int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
66  float z,fw,f[20],fq[20],q[20];
67 
68  /* initialize jk*/
69  jk = init_jk[prec];
70  jp = jk;
71 
72  /* determine jx,jv,q0, note that 3>q0 */
73  jx = nx-1;
74  jv = (e0-3)/8; if(jv<0) jv=0;
75  q0 = e0-8*(jv+1);
76 
77  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
78  j = jv-jx; m = jx+jk;
79  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (float) ipio2[j];
80 
81  /* compute q[0],q[1],...q[jk] */
82  for (i=0;i<=jk;i++) {
83  for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
84  }
85 
86  jz = jk;
87 recompute:
88  /* distill q[] into iq[] reversingly */
89  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
90  fw = (float)((int32_t)(twon8* z));
91  iq[i] = (int32_t)(z-two8*fw);
92  z = q[j-1]+fw;
93  }
94 
95  /* compute n */
96  z = scalbnf(z,q0); /* actual value of z */
97  z -= (float)8.0*floorf(z*(float)0.125); /* trim off integer >= 8 */
98  n = (int32_t) z;
99  z -= (float)n;
100  ih = 0;
101  if(q0>0) { /* need iq[jz-1] to determine n */
102  i = (iq[jz-1]>>(8-q0)); n += i;
103  iq[jz-1] -= i<<(8-q0);
104  ih = iq[jz-1]>>(7-q0);
105  }
106  else if(q0==0) ih = iq[jz-1]>>8;
107  else if(z>=(float)0.5) ih=2;
108 
109  if(ih>0) { /* q > 0.5 */
110  n += 1; carry = 0;
111  for(i=0;i<jz ;i++) { /* compute 1-q */
112  j = iq[i];
113  if(carry==0) {
114  if(j!=0) {
115  carry = 1; iq[i] = 0x100- j;
116  }
117  } else iq[i] = 0xff - j;
118  }
119  if(q0>0) { /* rare case: chance is 1 in 12 */
120  switch(q0) {
121  case 1:
122  iq[jz-1] &= 0x7f; break;
123  case 2:
124  iq[jz-1] &= 0x3f; break;
125  }
126  }
127  if(ih==2) {
128  z = one - z;
129  if(carry!=0) z -= scalbnf(one,q0);
130  }
131  }
132 
133  /* check if recomputation is needed */
134  if(z==zero) {
135  j = 0;
136  for (i=jz-1;i>=jk;i--) j |= iq[i];
137  if(j==0) { /* need recomputation */
138  for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
139 
140  for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
141  f[jx+i] = (float) ipio2[jv+i];
142  for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
143  q[i] = fw;
144  }
145  jz += k;
146  goto recompute;
147  }
148  }
149 
150  /* chop off zero terms */
151  if(z==(float)0.0) {
152  jz -= 1; q0 -= 8;
153  while(iq[jz]==0) { jz--; q0-=8;}
154  } else { /* break z into 8-bit if necessary */
155  z = scalbnf(z,-q0);
156  if(z>=two8) {
157  fw = (float)((int32_t)(twon8*z));
158  iq[jz] = (int32_t)(z-two8*fw);
159  jz += 1; q0 += 8;
160  iq[jz] = (int32_t) fw;
161  } else iq[jz] = (int32_t) z ;
162  }
163 
164  /* convert integer "bit" chunk to floating-point value */
165  fw = scalbnf(one,q0);
166  for(i=jz;i>=0;i--) {
167  q[i] = fw*(float)iq[i]; fw*=twon8;
168  }
169 
170  /* compute PIo2[0,...,jp]*q[jz,...,0] */
171  for(i=jz;i>=0;i--) {
172  for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
173  fq[jz-i] = fw;
174  }
175 
176  /* compress fq[] into y[] */
177  switch(prec) {
178  case 0:
179  fw = 0.0;
180  for (i=jz;i>=0;i--) fw += fq[i];
181  y[0] = (ih==0)? fw: -fw;
182  break;
183  case 1:
184  case 2:
185  fw = 0.0;
186  for (i=jz;i>=0;i--) fw += fq[i];
187  y[0] = (ih==0)? fw: -fw;
188  fw = fq[0]-fw;
189  for (i=1;i<=jz;i++) fw += fq[i];
190  y[1] = (ih==0)? fw: -fw;
191  break;
192  case 3: /* painful */
193  for (i=jz;i>0;i--) {
194  fw = fq[i-1]+fq[i];
195  fq[i] += fq[i-1]-fw;
196  fq[i-1] = fw;
197  }
198  for (i=jz;i>1;i--) {
199  fw = fq[i-1]+fq[i];
200  fq[i] += fq[i-1]-fw;
201  fq[i-1] = fw;
202  }
203  for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
204  if(ih==0) {
205  y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
206  } else {
207  y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
208  }
209  }
210  return n&7;
211 }
212 
213 #endif