From d498ff0ac474c00dd4e08939d2b3cc3da6f2cb78 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 3 Jan 2010 21:06:27 +0100 Subject: ntpd: try to avoid using libm. -1.2k if we succeed uclibc's sqrt(x) is pathetic, 411 bytes? it can be ~100... Signed-off-by: Denys Vlasenko --- networking/ntpd.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'networking/ntpd.c') diff --git a/networking/ntpd.c b/networking/ntpd.c index 08e51ef3f..f147d8c6a 100644 --- a/networking/ntpd.c +++ b/networking/ntpd.c @@ -299,7 +299,45 @@ static ALWAYS_INLINE double MIND(double a, double b) return a; return b; } -#define SQRT(x) (sqrt(x)) +static NOINLINE double my_SQRT(double X) +{ + union { + float f; + int32_t i; + } v; + double invsqrt; + double Xhalf = X * 0.5; + + /* Fast and good approximation to 1/sqrt(X), black magic */ + v.f = X; + /*v.i = 0x5f3759df - (v.i >> 1);*/ + v.i = 0x5f375a86 - (v.i >> 1); /* - this constant is slightly better */ + invsqrt = v.f; /* better than 0.2% accuracy */ + + /* Refining it using Newton's method: x1 = x0 - f(x0)/f'(x0) + * f(x) = 1/(x*x) - X (f==0 when x = 1/sqrt(X)) + * f'(x) = -2/(x*x*x) + * f(x)/f'(x) = (X - 1/(x*x)) / (2/(x*x*x)) = X*x*x*x/2 - x/2 + * x1 = x0 - (X*x0*x0*x0/2 - x0/2) = 1.5*x0 - X*x0*x0*x0/2 = x0*(1.5 - (X/2)*x0*x0) + */ + invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); /* ~0.05% accuracy */ + /* invsqrt = invsqrt * (1.5 - Xhalf * invsqrt * invsqrt); 2nd iter: ~0.0001% accuracy */ + /* With 4 iterations, more than half results will be exact, + * at 6th iterations result stabilizes with about 72% results exact. + * We are well satisfied with 0.05% accuracy. + */ + + return X * invsqrt; /* X * 1/sqrt(X) ~= sqrt(X) */ +} +static ALWAYS_INLINE double SQRT(double X) +{ + /* If this arch doesn't use IEEE 754 floats, fall back to using libm */ + if (sizeof(float) != 4) + return sqrt(X); + + /* This avoids needing libm, saves about 1.2k on x86-32 */ + return my_SQRT(X); +} static double gettime1900d(void) -- cgit v1.2.3