From ab801874f852312787c049272c20b14e06ed8195 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Sun, 2 Dec 2007 08:35:37 +0000 Subject: attack the biggest stack users: -mkfs_minix_main [busybox_unstripped]: 4288 -mkfs_minix_main [busybox_unstripped]: 4276 -grave [busybox_unstripped]: 4260 (bzip2 users too - not listed) price we pay in code size increase: mainSort 2458 2515 +57 grave 1005 1058 +53 sendMTFValues 2177 2195 +18 BZ2_blockSort 122 125 +3 mkfs_minix_main 3070 3022 -48 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/1 up/down: 131/-48) Total: 83 bytes --- archival/bz/blocksort.c | 21 +++++++++++++++------ archival/bz/bzlib_private.h | 19 ++++++++++++++----- archival/bz/compress.c | 10 ++++++++-- archival/bz/huffman.c | 11 ++++++++++- 4 files changed, 47 insertions(+), 14 deletions(-) (limited to 'archival/bz') diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c index cddbfcbea..0e73ffeba 100644 --- a/archival/bz/blocksort.c +++ b/archival/bz/blocksort.c @@ -721,7 +721,8 @@ void mainQSort3(uint32_t* ptr, #define CLEARMASK (~(SETMASK)) static NOINLINE -void mainSort(uint32_t* ptr, +void mainSort(EState* state, + uint32_t* ptr, uint8_t* block, uint16_t* quadrant, uint32_t* ftab, @@ -729,13 +730,18 @@ void mainSort(uint32_t* ptr, int32_t* budget) { int32_t i, j, k, ss, sb; - int32_t runningOrder[256]; - Bool bigDone[256]; - int32_t copyStart[256]; - int32_t copyEnd [256]; uint8_t c1; int32_t numQSorted; uint16_t s; + Bool bigDone[256]; + /* bbox: moved to EState to save stack + int32_t runningOrder[256]; + int32_t copyStart[256]; + int32_t copyEnd [256]; + */ +#define runningOrder (state->mainSort__runningOrder) +#define copyStart (state->mainSort__copyStart) +#define copyEnd (state->mainSort__copyEnd) /*-- set up the 2-byte frequency table --*/ /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ @@ -985,6 +991,9 @@ void mainSort(uint32_t* ptr, AssertH(((bbSize-1) >> shifts) <= 65535, 1002); } } +#undef runningOrder +#undef copyStart +#undef copyEnd } #undef BIGFREQ @@ -1041,7 +1050,7 @@ void BZ2_blockSort(EState* s) */ budget = nblock * ((wfact-1) / 3); - mainSort(ptr, block, quadrant, ftab, nblock, &budget); + mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); if (budget < 0) { fallbackSort(s->arr1, s->arr2, ftab, nblock); } diff --git a/archival/bz/bzlib_private.h b/archival/bz/bzlib_private.h index 02f177eb2..48676a391 100644 --- a/archival/bz/bzlib_private.h +++ b/archival/bz/bzlib_private.h @@ -178,13 +178,22 @@ typedef struct EState { uint8_t selector [BZ_MAX_SELECTORS]; uint8_t selectorMtf[BZ_MAX_SELECTORS]; - uint8_t len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - int32_t code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + /* stack-saving measures: these can be local, but they are too big */ + int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; #if CONFIG_BZIP2_FEATURE_SPEED >= 5 /* second dimension: only 3 needed; 4 makes index calculations faster */ - uint32_t len_pack[BZ_MAX_ALPHA_SIZE][4]; + uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4]; #endif + int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2]; + + int32_t mainSort__runningOrder[256]; + int32_t mainSort__copyStart[256]; + int32_t mainSort__copyEnd[256]; } EState; @@ -203,7 +212,7 @@ static void BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t); static void -BZ2_hbMakeCodeLengths(uint8_t*, int32_t*, int32_t, int32_t); +BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t); /*-------------------------------------------------------------*/ /*--- end bzlib_private.h ---*/ diff --git a/archival/bz/compress.c b/archival/bz/compress.c index b72edbbd4..640b8872b 100644 --- a/archival/bz/compress.c +++ b/archival/bz/compress.c @@ -264,13 +264,16 @@ void sendMTFValues(EState* s) * are also globals only used in this proc. * Made global to keep stack frame size small. */ +#define code sendMTFValues__code +#define rfreq sendMTFValues__rfreq +#define len_pack sendMTFValues__len_pack uint16_t cost[BZ_N_GROUPS]; int32_t fave[BZ_N_GROUPS]; uint16_t* mtfv = s->mtfv; - alphaSize = s->nInUse+2; + alphaSize = s->nInUse + 2; for (t = 0; t < BZ_N_GROUPS; t++) for (v = 0; v < alphaSize; v++) s->len[t][v] = BZ_GREATER_ICOST; @@ -453,7 +456,7 @@ void sendMTFValues(EState* s) /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See * comment in huffman.c for details. */ for (t = 0; t < nGroups; t++) - BZ2_hbMakeCodeLengths(&(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/); + BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/); } AssertH(nGroups < 8, 3002); @@ -602,6 +605,9 @@ void sendMTFValues(EState* s) selCtr++; } AssertH(selCtr == nSelectors, 3007); +#undef code +#undef rfreq +#undef len_pack } diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c index 02838c496..676b1af66 100644 --- a/archival/bz/huffman.c +++ b/archival/bz/huffman.c @@ -98,7 +98,8 @@ void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap) /*---------------------------------------------------*/ static -void BZ2_hbMakeCodeLengths(uint8_t *len, +void BZ2_hbMakeCodeLengths(EState *s, + uint8_t *len, int32_t *freq, int32_t alphaSize, int32_t maxLen) @@ -110,9 +111,14 @@ void BZ2_hbMakeCodeLengths(uint8_t *len, int32_t nNodes, nHeap, n1, n2, i, j, k; Bool tooLong; + /* bbox: moved to EState to save stack int32_t heap [BZ_MAX_ALPHA_SIZE + 2]; int32_t weight[BZ_MAX_ALPHA_SIZE * 2]; int32_t parent[BZ_MAX_ALPHA_SIZE * 2]; + */ +#define heap (s->BZ2_hbMakeCodeLengths__heap) +#define weight (s->BZ2_hbMakeCodeLengths__weight) +#define parent (s->BZ2_hbMakeCodeLengths__parent) for (i = 0; i < alphaSize; i++) weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; @@ -189,6 +195,9 @@ void BZ2_hbMakeCodeLengths(uint8_t *len, weight[i] = j << 8; } } +#undef heap +#undef weight +#undef parent } -- cgit v1.2.3