aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2015-02-02 16:07:07 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2015-02-02 16:07:07 +0100
commitf7f70bf1b3025550ea4ad8d13d977b846a868a06 (patch)
tree2c18d81eab7e897d982a18667fdafa062a93e033
parent7f7ade1964f61172125d9f4fe92f0b07ce8bc7a4 (diff)
downloadbusybox-f7f70bf1b3025550ea4ad8d13d977b846a868a06.tar.gz
gzip: speed up and shrink put_16bit()
function old new delta put_16bit 104 98 -6 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--archival/gzip.c41
-rw-r--r--include/platform.h2
2 files changed, 36 insertions, 7 deletions
diff --git a/archival/gzip.c b/archival/gzip.c
index 46367f9e6..18d795996 100644
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -417,19 +417,46 @@ static void flush_outbuf(void)
#define put_8bit(c) \
do { \
G1.outbuf[G1.outcnt++] = (c); \
- if (G1.outcnt == OUTBUFSIZ) flush_outbuf(); \
+ if (G1.outcnt == OUTBUFSIZ) \
+ flush_outbuf(); \
} while (0)
/* Output a 16 bit value, lsb first */
static void put_16bit(ush w)
{
- if (G1.outcnt < OUTBUFSIZ - 2) {
- G1.outbuf[G1.outcnt++] = w;
- G1.outbuf[G1.outcnt++] = w >> 8;
- } else {
- put_8bit(w);
- put_8bit(w >> 8);
+ /* GCC 4.2.1 won't optimize out redundant loads of G1.outcnt
+ * (probably because of fear of aliasing with G1.outbuf[]
+ * stores), do it explicitly:
+ */
+ unsigned outcnt = G1.outcnt;
+ uch *dst = &G1.outbuf[outcnt];
+
+#if BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN
+ if (outcnt < OUTBUFSIZ-2) {
+ /* Common case */
+ ush *dst16 = (void*) dst;
+ *dst16 = w; /* unalinged LSB 16-bit store */
+ G1.outcnt = outcnt + 2;
+ return;
+ }
+ *dst = (uch)w;
+ w >>= 8;
+#else
+ *dst++ = (uch)w;
+ w >>= 8;
+ if (outcnt < OUTBUFSIZ-2) {
+ /* Common case */
+ *dst = w;
+ G1.outcnt = outcnt + 2;
+ return;
}
+#endif
+
+ /* Slowpath: we will need to do flush_outbuf() */
+ G1.outcnt++;
+ if (G1.outcnt == OUTBUFSIZ)
+ flush_outbuf();
+ put_8bit(w);
}
static void put_32bit(ulg n)
diff --git a/include/platform.h b/include/platform.h
index 0b0fce182..df9594507 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -217,6 +217,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
* a lvalue. This makes it more likely to not swap them by mistake
*/
#if defined(i386) || defined(__x86_64__) || defined(__powerpc__)
+# define BB_UNALIGNED_MEMACCESS_OK 1
# define move_from_unaligned_int(v, intp) ((v) = *(bb__aliased_int*)(intp))
# define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
# define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
@@ -225,6 +226,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
# define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v))
/* #elif ... - add your favorite arch today! */
#else
+# define BB_UNALIGNED_MEMACCESS_OK 0
/* performs reasonably well (gcc usually inlines memcpy here) */
# define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int)))
# define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))