From 28055028a709020ba7eb44f9e5037d0a952b51d6 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 4 Jan 2010 20:49:58 +0100 Subject: fold: unicode support. Based on a patch by Tomas Heinrich General Unicode support is tweaked to expose unicode_status. function old new delta init_unicode - 77 +77 write2stdout - 19 +19 adjust_column 68 71 +3 unicode_status - 1 +1 unicode_is_enabled 1 - -1 grep_main 780 773 -7 fold_main 619 552 -67 check_unicode_in_env 77 - -77 ------------------------------------------------------------------------------ (add/remove: 3/2 grow/shrink: 1/2 up/down: 100/-152) Total: -52 bytes Signed-off-by: Denys Vlasenko --- libbb/unicode.c | 46 +++++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 19 deletions(-) (limited to 'libbb/unicode.c') diff --git a/libbb/unicode.c b/libbb/unicode.c index 544528acd..9d316df04 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c @@ -7,7 +7,9 @@ * Licensed under GPL version 2, see file LICENSE in this tarball for details. */ #include "libbb.h" -# include "unicode.h" +#include "unicode.h" + +uint8_t unicode_status; size_t FAST_FUNC bb_mbstrlen(const char *string) { @@ -17,32 +19,38 @@ size_t FAST_FUNC bb_mbstrlen(const char *string) return width; } -#if !ENABLE_LOCALE_SUPPORT +#if ENABLE_LOCALE_SUPPORT + +/* Unicode support using libc */ + +void FAST_FUNC init_unicode(void) +{ + /* In unicode, this is a one character string */ + static const char unicode_0x394[] = { 0xce, 0x94, 0 }; + + if (unicode_status != UNICODE_UNKNOWN) + return; + + unicode_status = bb_mbstrlen(unicode_0x394) == 1 ? UNICODE_ON : UNICODE_OFF; +} + +#else /* Crude "locale support" which knows only C and Unicode locales */ -/* unicode_is_enabled: - * 0: not known yet, - * 1: not unicode (IOW: assuming one char == one byte) - * 2: unicode - */ -# if !ENABLE_FEATURE_CHECK_UNICODE_IN_ENV -# define unicode_is_enabled 2 -# else -static smallint unicode_is_enabled; -void FAST_FUNC check_unicode_in_env(void) +# if ENABLE_FEATURE_CHECK_UNICODE_IN_ENV +void FAST_FUNC init_unicode(void) { char *lang; - if (unicode_is_enabled) + if (unicode_status != UNICODE_UNKNOWN) return; - unicode_is_enabled = 1; + unicode_status = UNICODE_OFF; lang = getenv("LANG"); if (!lang || !(strstr(lang, ".utf") || strstr(lang, ".UTF"))) return; - - unicode_is_enabled = 2; + unicode_status = UNICODE_ON; } # endif @@ -85,7 +93,7 @@ static size_t wcrtomb_internal(char *s, wchar_t wc) size_t FAST_FUNC wcrtomb(char *s, wchar_t wc, mbstate_t *ps UNUSED_PARAM) { - if (unicode_is_enabled != 2) { + if (unicode_status != UNICODE_ON) { *s = wc; return 1; } @@ -97,7 +105,7 @@ size_t FAST_FUNC wcstombs(char *dest, const wchar_t *src, size_t n) { size_t org_n = n; - if (unicode_is_enabled != 2) { + if (unicode_status != UNICODE_ON) { while (n) { wchar_t c = *src++; *dest++ = c; @@ -137,7 +145,7 @@ size_t FAST_FUNC mbstowcs(wchar_t *dest, const char *src, size_t n) { size_t org_n = n; - if (unicode_is_enabled != 2) { + if (unicode_status != UNICODE_ON) { while (n) { unsigned char c = *src++; -- cgit v1.2.3