aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2017-04-11 07:02:42 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2017-04-11 07:02:42 +0200
commit10673c44f11045a0c99b19f32930097e9b3ae148 (patch)
treee7de67d712af2eaf62af3280c0ada8e6b9945afd
parentcc1f8ba489b809d2c859276ef10094df4bcc74ea (diff)
downloadbusybox-10673c44f11045a0c99b19f32930097e9b3ae148.tar.gz
factor: much faster, and very slightly larger isqrt()
function old new delta isqrt_odd 70 88 +18 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--coreutils/factor.c44
1 files changed, 10 insertions, 34 deletions
diff --git a/coreutils/factor.c b/coreutils/factor.c
index 11097c12d..f910fdb44 100644
--- a/coreutils/factor.c
+++ b/coreutils/factor.c
@@ -48,41 +48,17 @@ typedef unsigned long half_t;
static inline half_t isqrt(wide_t N)
{
half_t x;
+ unsigned shift;
- // Never called with N < 1
- //if (N == 0)
- // return 0;
-
- x = HALF_MAX;
- /* First approximation of x+1 > sqrt(N) - all-ones, half as many bits:
- * 1xxxxx -> 111 (six bits to three)
- * 01xxxx -> 111
- * 001xxx -> 011
- * 0001xx -> 011 and so on.
- */
- // It is actually not performance-critical at all.
- // Can simply start Newton loop with very conservative x=0xffffffff.
- //wide_t mask_2bits;
- //mask_2bits = TOPMOST_WIDE_BIT | (TOPMOST_WIDE_BIT >> 1);
- //while (!(N & mask_2bits)) {
- // x >>= 1;
- // mask_2bits >>= 2;
- //}
- dbg("x:%"HALF_FMT"x", x);
-
- for (;;) {
- half_t y = (x + N/x) / 2;
- dbg("y:%x y^2:%llx", y, (wide_t)y * y);
- /*
- * "real" y may be one bit wider: 0x100000000 and get truncated to 0.
- * In this case, "real" y is > x. The first check below is for this case:
- */
- if (y == 0 || y >= x) {
- dbg("isqrt(%llx)=%"HALF_FMT"x", N, x);
- return x;
- }
- x = y;
- }
+ shift = WIDE_BITS - 2;
+ x = 0;
+ do {
+ x = (x << 1) + 1;
+ if ((wide_t)x * x > (N >> shift))
+ x--; /* whoops, that +1 was too much */
+ shift -= 2;
+ } while ((int)shift >= 0);
+ return x;
}
static NOINLINE half_t isqrt_odd(wide_t N)