From 6a9154b6f649341870bc06e896d2fe7235a4aef9 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Sun, 14 Oct 2007 07:49:48 +0000 Subject: bzip2: eliminate some divisions --- archival/bz/blocksort.c | 64 ++++++++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 27 deletions(-) (limited to 'archival/bz/blocksort.c') diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c index ec8a2a56b..aaed883de 100644 --- a/archival/bz/blocksort.c +++ b/archival/bz/blocksort.c @@ -246,7 +246,12 @@ void fallbackSort(uint32_t* fmap, for (i = 0; i < 257; i++) ftab[i] = 0; for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; - for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; + + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i < 257; i++) { + j += ftab[i]; + ftab[i] = j; + } for (i = 0; i < nblock; i++) { j = eclass8[i]; @@ -255,7 +260,7 @@ void fallbackSort(uint32_t* fmap, fmap[k] = i; } - nBhtab = 2 + (nblock / 32); + nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ for (i = 0; i < nBhtab; i++) bhtab[i] = 0; for (i = 0; i < 256; i++) SET_BH(ftab[i]); @@ -737,27 +742,27 @@ void mainSort(uint32_t* ptr, memset(ftab, 0, 65537 * sizeof(ftab[0])); j = block[0] << 8; - i = nblock-1; + i = nblock - 1; /* 3%, +300 bytes */ #if CONFIG_BZIP2_FEATURE_SPEED >= 2 for (; i >= 3; i -= 4) { quadrant[i] = 0; - j = (j >> 8) |(((uint16_t)block[i]) << 8); + j = (j >> 8) | (((uint16_t)block[i]) << 8); ftab[j]++; quadrant[i-1] = 0; - j = (j >> 8) |(((uint16_t)block[i-1]) << 8); + j = (j >> 8) | (((uint16_t)block[i-1]) << 8); ftab[j]++; quadrant[i-2] = 0; - j = (j >> 8) |(((uint16_t)block[i-2]) << 8); + j = (j >> 8) | (((uint16_t)block[i-2]) << 8); ftab[j]++; quadrant[i-3] = 0; - j = (j >> 8) |(((uint16_t)block[i-3]) << 8); + j = (j >> 8) | (((uint16_t)block[i-3]) << 8); ftab[j]++; } #endif for (; i >= 0; i--) { quadrant[i] = 0; - j = (j >> 8) |(((uint16_t)block[i]) << 8); + j = (j >> 8) | (((uint16_t)block[i]) << 8); ftab[j]++; } @@ -768,34 +773,37 @@ void mainSort(uint32_t* ptr, } /*-- Complete the initial radix sort --*/ - for (i = 1; i <= 65536; i++) - ftab[i] += ftab[i-1]; + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i <= 65536; i++) { + j += ftab[i]; + ftab[i] = j; + } s = block[0] << 8; - i = nblock-1; + i = nblock - 1; #if CONFIG_BZIP2_FEATURE_SPEED >= 2 for (; i >= 3; i -= 4) { s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i; s = (s >> 8) | (block[i-1] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i-1; s = (s >> 8) | (block[i-2] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i-2; s = (s >> 8) | (block[i-3] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i-3; } #endif for (; i >= 0; i--) { s = (s >> 8) | (block[i] << 8); - j = ftab[s] -1; + j = ftab[s] - 1; ftab[s] = j; ptr[j] = i; } @@ -812,21 +820,23 @@ void mainSort(uint32_t* ptr, { int32_t vv; - /* was: int32_t h = 1; */ + /* bbox: was: int32_t h = 1; */ /* do h = 3 * h + 1; while (h <= 256); */ - int32_t h = 364; + uint32_t h = 364; do { - h = h / 3; + /*h = h / 3;*/ + h = (h * 171) >> 9; /* bbox: fast h/3 */ for (i = h; i <= 255; i++) { vv = runningOrder[i]; j = i; while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { runningOrder[j] = runningOrder[j-h]; j = j - h; - if (j <= (h - 1)) goto zero; + if (j <= (h - 1)) + goto zero; } - zero: + zero: runningOrder[j] = vv; } } while (h != 1); @@ -860,10 +870,10 @@ void mainSort(uint32_t* ptr, if (j != ss) { sb = (ss << 8) + j; if (!(ftab[sb] & SETMASK)) { - int32_t lo = ftab[sb] & CLEARMASK; + int32_t lo = ftab[sb] & CLEARMASK; int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; if (hi > lo) { - mainQSort3 ( + mainQSort3( ptr, block, quadrant, nblock, lo, hi, BZ_N_RADIX, budget ); @@ -966,15 +976,14 @@ void mainSort(uint32_t* ptr, while ((bbSize >> shifts) > 65534) shifts++; for (j = bbSize-1; j >= 0; j--) { - int32_t a2update = ptr[bbStart + j]; - uint16_t qVal = (uint16_t)(j >> shifts); + int32_t a2update = ptr[bbStart + j]; + uint16_t qVal = (uint16_t)(j >> shifts); quadrant[a2update] = qVal; if (a2update < BZ_N_OVERSHOOT) quadrant[a2update + nblock] = qVal; } AssertH(((bbSize-1) >> shifts) <= 65535, 1002); } - } } @@ -1041,7 +1050,8 @@ void BZ2_blockSort(EState* s) s->origPtr = -1; for (i = 0; i < s->nblock; i++) if (ptr[i] == 0) { - s->origPtr = i; break; + s->origPtr = i; + break; }; AssertH(s->origPtr != -1, 1003); -- cgit v1.2.3