// f161_o.h // // Copyright (C) 1998-2013 Robert P. Munafo. // // float_161 type for C++ // // This is a high-precision float type built on three IEEE 64-bit doubles. // // 20100909 Eliminate a warning. #ifndef _f161_o_ #define _f161_o_ #ifndef F161_APPROX # define F161_APPROX 1 #endif #include "f107_o.h" // "pf" stands for "primitive float"; it is the floating point type upon // which the library is built. #ifdef f161_pf # undef f161_pf #endif #define f161_pf double typedef short f161_s16; class f161_o { private: public: f161_pf c0; f161_pf c1; f161_pf c2; /* Add four pf with double-pf precision. */ // add_P4_2 // input: f53 a; f53 b; f52 c; f53 d // output: (r0, r1) = a + b + c + d // ops: 20 static f161_pf add_P4_2(f161_pf a, f161_pf b, f161_pf c, f161_pf d, f161_pf & r1) { f161_pf r0, u, v, x, y, z; u = f107_o::add_112(a, b, x); v = f107_o::add_112(c, d, y); r0 = f107_o::add_112(u, v, z); r1 = x + y + z; return r0; } friend void print_f161_a(f161_o a, int cr); friend void add_f161(f161_o a, f161_o b, f161_o &res); friend void sub_f161(f161_o a, f161_o b, f161_o &res); friend void div_f161(f161_o numer, f161_o denom, f161_o &res); friend f161_s16 cmp_f161(f161_o a, f161_o b); f161_o ( ); f161_o ( f161_pf x ); f161_o ( const f107_o & ); f161_o ( const f161_o & ); operator short ( ); operator long ( ); operator float ( ); operator double ( ); operator long double ( ); operator f107_o ( ); f161_o & operator = ( const f161_o & ); f161_o & operator = ( f161_pf ); f161_o & operator = ( const f107_o & ); friend f161_o operator + ( const f161_o &, const f161_o & ); friend f161_o operator + ( f161_pf, const f161_o & ); friend f161_o operator + ( const f161_o &, f161_pf ); friend f161_o operator - ( const f161_o &, const f161_o & ); friend f161_o operator - ( f161_pf, const f161_o & ); friend f161_o operator - ( const f161_o &, f161_pf ); friend f161_o operator * ( const f161_o &, const f161_o & ); friend f161_o operator * ( f161_pf, const f161_o & ); friend f161_o operator * ( const f161_o &, f161_pf ); friend f161_o operator / ( const f161_o &, const f161_o & ); friend f161_o operator / ( f161_pf, const f161_o & ); friend f161_o operator / ( const f161_o &, f161_pf ); friend f161_o operator - ( const f161_o & ); f161_o & operator += ( f161_o & ); f161_o & operator += ( f161_pf ); f161_o & operator -= ( f161_o & ); f161_o & operator -= ( f161_pf ); friend int operator < ( const f161_o &, const f161_o & ); friend int operator < ( f161_pf, const f161_o & ); friend int operator < ( const f161_o &, f161_pf ); friend int operator << ( const f161_o &, const f161_o & ); friend int operator << ( f161_pf, const f161_o & ); friend int operator << ( const f161_o &, f161_pf ); friend int operator > ( const f161_o &, const f161_o & ); friend int operator > ( f161_pf, const f161_o & ); friend int operator > ( const f161_o &, f161_pf ); friend int operator >> ( const f161_o &, const f161_o & ); friend int operator >> ( f161_pf, const f161_o & ); friend int operator >> ( const f161_o &, f161_pf ); friend int operator >= ( const f161_o &, const f161_o & ); friend int operator >= ( const f161_o &, f161_pf ); friend int operator >= ( f161_pf, const f161_o & ); friend int operator <= ( const f161_o &, const f161_o & ); friend int operator <= ( const f161_o &, f161_pf ); friend int operator <= ( f161_pf, const f161_o & ); friend int operator != ( const f161_o &, const f161_o & ); friend int operator != ( f161_pf, const f161_o & ); friend int operator != ( const f161_o &, f161_pf ); friend f161_o square ( const f161_o& z ); friend f161_o trunc ( const f161_o& z ); friend f161_o fabs ( const f161_o& z ); friend void f161_to_digits(f161_o x, char *s, int *expn, int *sign, int precision); }; inline f161_o::f161_o ( ): c0(0), c1(0), c2(0) { } void f161_o_init(void); // f107_o cv_f161_f107( const f161_o & ); f161_o f161_native_2x ( const f161_o & ); inline f161_o f161_native_2x( const f161_o & x ) { f161_o res; res.c0 = x.c0 * 2.0; res.c1 = x.c1 * 2.0; res.c2 = x.c2 * 2.0; return(res); } f161_o f161_native_square(f161_o a); void cfa(f161_o x, f161_s16 order, f161_o *n, f161_o *d); void pr_raw_double_161(double d, int sexp, int hm, int mant); void pr_mant_comp(double mc); f161_o f161_sqrt(f161_o x); f161_o f161_intpow(f161_o x, int p); void f161_spf(char *s1, char sign_flag, int precision, f161_o x); // Assignment operators inline f161_o & f161_o :: operator = ( f161_pf x) { c0 = x; c1 = 0; c2 = 0; return *this; } inline f161_o & f161_o :: operator = ( const f107_o & x) { c0 = x.c0; c1 = x.c1; c2 = 0; return *this; } inline f161_o & f161_o :: operator = ( const f161_o & x) { c0 = x.c0; c1 = x.c1; c2 = x.c2; return *this; } // Conversion operators: DEMOTION inline f161_o :: operator short ( ) { short result; result = (short) c0; return result; } inline f161_o :: operator long ( ) { long result; // The assumption here is that "long" is less than 107 bits. result = ((long) c0) + ((long) c1); return result; } inline f161_o :: operator float ( ) { float result; result = c0; return result; } inline f161_o :: operator double ( ) { double result; result = c0; return result; } inline f161_o :: operator long double ( ) { long double result; // The assumption here is that "long double" is less than 107 bits. result = ((long double) c0) + ((long double) c1); return result; } inline f161_o :: operator f107_o ( ) { f107_o result; result.c0 = c0; result.c1 = c1; return result; } // Conversion operators: PROMOTION inline f161_o::f161_o ( f161_pf x ): c0(x), c1(0.0), c2(0.0) { } inline f161_o::f161_o ( const f107_o & x ): c0(x.c0), c1(x.c1), c2(0.0) { } inline f161_o::f161_o ( const f161_o & x ): c0(x.c0), c1(x.c1), c2(x.c2) { } // -------------- ADDITION and SUBTRACTION PRIMITIVES ---------------- /* add_1113 is called THREE-SUM in the QD paper. Add 3 pf's, returning the 3 components of a normalized td_real */ inline f161_pf add_1113(f161_pf x, f161_pf y, f161_pf z, f161_pf &r1, f161_pf &r2) { f161_pf u, v, w, r0; u = f107_o::add_112(x, y, v); r0 = f107_o::add_112(u, z, w); r1 = f107_o::add_112(v, w, r2); return r0; } /* add_1111 is the THREE-SUM that returns only one component. Adds 3 pf's, returning only the highest component of the answer. */ inline f161_pf add_1111(f161_pf x, f161_pf y, f161_pf z) { return x + y + z; } /* This is a modified version of RENORMALIZE that reduces 4 components to 3. The pseudo-code, with the outer loop expanded, is: (s, t3) = normalize_22(a2, a3) (s, t2) = normalize_22(a1, s) (t0, t1) = normalize_22(a0, s) s = t0 k = 0 for i=1 (s,e) = normalize_22(s,t1) if e != 0 b[k] = s s = e k=k+1 end if for i=2 (s,e) = normalize_22(s,t2) if e != 0 b[k] = s s = e k=k+1 end if for i=3 (s,e) = normalize_22(s,t3) if e != 0 b[k] = s s = e k=k+1 end if end for return(b[0], b[1], b[2]) Here is the first working version, before I eliminated the unnecessary statements: inline f161_pf normalize_43(f161_pf x0, f161_pf x1, f161_pf x2, f161_pf x3, f161_pf &s1, f161_pf &s2) { f161_pf s, e, t3, t2, t1; f161_pf b[3]; int k; s = normalize_22(x2, x3, t3); s = normalize_22(x1, s, t2); s = normalize_22(x0, s, t1); k = 0; b[0] = b[1] = b[2] = 0; s = normalize_22(s, t1, e); if (e != 0) { b[k] = s; s = e; k=k+1; } s = normalize_22(s, t2, e); if (e != 0) { b[k] = s; s = e; k=k+1; } s = normalize_22(s, t3, e); b[k] = s; s1 = b[1]; s2 = b[2]; return b[0]; } The line "s = normalize_22(s, t1, e);" is redundant because the input (s,t1) just came from the previous call to normalize_22. Therefore it can be simplified by replacing that statement with "e=t1;" inline f161_pf normalize_43(f161_pf x0, f161_pf x1, f161_pf x2, f161_pf x3, f161_pf &s1, f161_pf &s2) { f161_pf s, e, t3, t2, t1; f161_pf b[3]; int k; s = normalize_22(x2, x3, t3); s = normalize_22(x1, s, t2); s = normalize_22(x0, s, t1); k = 0; b[0] = b[1] = b[2] = 0; e = t1; if (e != 0) { b[k] = s; s = e; k=k+1; } s = normalize_22(s, t2, e); if (e != 0) { b[k] = s; s = e; k=k+1; } s = normalize_22(s, t3, e); b[k] = s; s1 = b[1]; s2 = b[2]; return b[0]; } */ inline f161_pf normalize_43(f161_pf x0, f161_pf x1, f161_pf x2, f161_pf x3, f161_pf &s1, f161_pf &s2) { f161_pf s, e, t3, t2, s0; s = f107_o::normalize_22(x2, x3, t3); s = f107_o::normalize_22(x1, s, t2); s = f107_o::normalize_22(x0, s, e); if (e != 0) { // cases T-* s0 = s; s = f107_o::normalize_22(e, t2, e); if (e != 0) { // case T-T s1 = s; s2 = e + t3; } else { // case T-F s1 = s + t3; s2 = 0; } } else { // cases F-* s = f107_o::normalize_22(s, t2, e); if (e != 0) { // case F-T s0 = s; s1 = e + t3; s2 = 0; } else { // case F-F s0 = s + t3; s1 = s2 = 0; } } return s0; } inline f161_pf normalize_33(f161_pf x0, f161_pf x1, f161_pf x2, f161_pf &s1, f161_pf &s2) { f161_pf s, e, t2, s0; s = f107_o::normalize_22(x1, x2, t2); s = f107_o::normalize_22(x0, s, e); if (e != 0) { s0 = s; s1 = f107_o::normalize_22(e, t2, e); s2 = e; } else { s0 = f107_o::normalize_22(s, t2, e); s1 = e; s2 = 0; } return s0; } // -------------- ADDITION and SUBTRACTION OPERATORS ---------------- inline f161_o operator + ( const f161_o & a, const f161_o & b ) { f161_o res; f161_pf t0, t1, t2, u1, v1, w2, x2, y2; #if F161_APPROX t0 = f107_o::add_112(a.c0, b.c0, u1); v1 = f107_o::add_112(a.c1, b.c1, w2); y2 = a.c2 + b.c2; t1 = f107_o::add_112(u1, v1, x2); t2 = w2 + x2 + y2; res.c0 = normalize_33(t0, t1, t2, res.c1, res.c2); #else f161_pf t3, z3, b3; t0 = f107_o::add_112(a.c0, b.c0, u1); v1 = f107_o::add_112(a.c1, b.c1, w2); y2 = f107_o::add_112(a.c2, b.c2, z3); t1 = f107_o::add_112(u1, v1, x2); t2 = add_1112(w2, x2, y2, b3); t3 = b3 + z3; res.c0 = normalize_43(t0, t1, t2, t3, res.c1, res.c2); #endif return res; } inline f161_o operator + ( const f161_o & a, f161_pf b ) { f161_o res; f161_pf x0, x1, x2, t; #if F161_APPROX x0 = f107_o::add_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = a.c2 + t; res.c0 = normalize_33(x0, x1, x2, res.c1, res.c2); #else f161_pf x3; x0 = f107_o::add_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = f107_o::add_112(a.c2, t, x3); res.c0 = normalize_43(x0, x1, x2, x3, res.c1, res.c2); #endif return res; } inline f161_o operator + ( f161_pf b, const f161_o & a ) { f161_o res; f161_pf x0, x1, x2, t; #if F161_APPROX x0 = f107_o::add_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = a.c2 + t; res.c0 = normalize_33(x0, x1, x2, res.c1, res.c2); #else f161_pf x3; x0 = f107_o::add_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = f107_o::add_112(a.c2, t, x3); res.c0 = normalize_43(x0, x1, x2, x3, res.c1, res.c2); #endif return res; } inline f161_o operator - ( const f161_o & a, const f161_o & b ) { f161_o res; f161_pf t0, t1, t2, u1, v1, w2, x2, y2; #if F161_APPROX t0 = f107_o::sub_112(a.c0, b.c0, u1); v1 = f107_o::sub_112(a.c1, b.c1, w2); y2 = a.c2 - b.c2; t1 = f107_o::add_112(u1, v1, x2); t2 = w2 + x2 + y2; res.c0 = normalize_33(t0, t1, t2, res.c1, res.c2); #else f161_pf t3, z3, b3; t0 = f107_o::sub_112(a.c0, b.c0, u1); v1 = f107_o::sub_112(a.c1, b.c1, w2); y2 = f107_o::sub_112(a.c2, b.c2, z3); t1 = f107_o::add_112(u1, v1, x2); t2 = add_1112(w2, x2, y2, b3); t3 = b3 + z3; res.c0 = normalize_43(t0, t1, t2, t3, res.c1, res.c2); #endif return res; } inline f161_o operator - ( const f161_o & a, f161_pf b ) { f161_o res; f161_pf x0, x1, x2, t; #if F161_APPROX x0 = f107_o::sub_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = a.c2 + t; res.c0 = normalize_33(x0, x1, x2, res.c1, res.c2); #else f161_pf x3; x0 = f107_o::sub_112(a.c0, b, t); x1 = f107_o::add_112(a.c1, t, t); x2 = f107_o::add_112(a.c2, t, x3); res.c0 = normalize_43(x0, x1, x2, x3, res.c1, res.c2); #endif return res; } inline f161_o operator - ( f161_pf b, const f161_o & a ) { f161_o res; f161_pf x0, x1, x2, t; #if F161_APPROX x0 = f107_o::add_112(-(a.c0), b, t); x1 = f107_o::add_112(-(a.c1), t, t); x2 = t - a.c2; res.c0 = normalize_33(x0, x1, x2, res.c1, res.c2); #else f161_pf x3; x0 = f107_o::add_112(-(a.c0), b, t); x1 = f107_o::add_112(-(a.c1), t, t); x2 = f107_o::add_112(-(a.c2), t, x3); res.c0 = normalize_43(x0, x1, x2, x3, res.c1, res.c2); #endif return res; } // ----------- comparison inline f161_s16 cmp_f161(f161_o a, f161_o b) { f161_s16 result; if (a.c0 > b.c0) { result = 1; } else if (a.c0 < b.c0) { result = -1; } else { if (a.c1 > b.c1) { result = 1; } else if (a.c1 < b.c1) { result = -1; } else { if (a.c2 > b.c2) { result = 1; } else if (a.c2 < b.c2) { result = -1; } else { result = 0; } } } return(result); } // Comparison (relational) operators inline int operator < ( const f161_o & lhs, const f161_o & rhs ) { return(cmp_f161(lhs, rhs) < 0); } inline int operator << ( const f161_o & lhs, const f161_o & rhs ) { return(lhs.c0 < rhs.c0); } inline int operator < ( const f161_o & lhs, f161_pf rhs ) { return ((lhs.c0 < rhs) || ((lhs.c0 == rhs) && (lhs.c1 < 0)) || ((lhs.c0 == rhs) && (lhs.c1 == 0) && (lhs.c2 < 0)) ); } inline int operator << ( const f161_o & lhs, f161_pf rhs ) { return(lhs.c0 < rhs); } inline int operator < ( f161_pf lhs, const f161_o & rhs ) { return ((lhs < rhs.c0) || ((lhs == rhs.c0) && (rhs.c1 > 0)) || ((lhs == rhs.c0) && (rhs.c1 == 0) && (rhs.c2 > 0))); } inline int operator << ( f161_pf lhs, const f161_o & rhs ) { return (lhs < rhs.c0); } inline int operator > ( const f161_o & lhs, const f161_o & rhs ) { return (cmp_f161(lhs, rhs) > 0); } inline int operator >> ( const f161_o & lhs, const f161_o & rhs ) { return (lhs.c0 > rhs.c0); } inline int operator > ( f161_pf lhs, const f161_o & rhs ) { return ((lhs > rhs.c0) || ((lhs == rhs.c0) && (rhs.c1 < 0)) || ((lhs == rhs.c0) && (rhs.c1 == 0) && (rhs.c2 < 0))); } inline int operator >> ( f161_pf lhs, const f161_o & rhs ) { return (lhs > rhs.c0); } inline int operator > ( const f161_o & lhs, f161_pf rhs ) { return ((lhs.c0 > rhs) || ((lhs.c0 == rhs) && (lhs.c1 > 0)) || ((lhs.c0 == rhs) && (lhs.c1 == 0) && (lhs.c2 > 0))); } inline int operator >> ( const f161_o & lhs, f161_pf rhs ) { return (lhs.c0 > rhs); } // ------------ building blocks for multiply /* Add six pf's with double-pf precision. */ inline f161_pf add_P6_2(f161_pf a, f161_pf b, f161_pf c, f161_pf d, f161_pf e, f161_pf f, f161_pf & r1) { f161_pf r0, u, v, x, y, z; u = f107_o::add_1112(a, b, c, x); v = f107_o::add_1112(d, e, f, y); r0 = f107_o::add_112(u, v, z); r1 = x + y + z; return r0; } // unary operators inline f161_o operator - ( const f161_o & lhs ) { f161_o res; res.c0 = -(lhs.c0); res.c1 = -(lhs.c1); res.c2 = -(lhs.c2); return res; } #undef f161_pf #endif /* end of f161_o.h */