From 9f93d621925966c22ee51fdcb5def8e131596f9b Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 24 Jan 2010 07:44:03 +0100 Subject: libbb: better unicode width support. Hopefully fixes bug 839. Also opens up a possibility to make other unicode stuff smaller and more correct later. but: function old new delta static.combining - 516 +516 bb_wcwidth - 328 +328 unicode_cut_nchars - 141 +141 mbstowc_internal - 93 +93 in_table - 78 +78 cal_main 899 961 +62 static.combining0x10000 - 40 +40 unicode_strlen - 31 +31 bb_mbstrlen 31 - -31 bb_mbstowcs 173 102 -71 ------------------------------------------------------------------------------ (add/remove: 7/1 grow/shrink: 1/1 up/down: 1289/-102) Total: 1187 bytes Uses code of Markus Kuhn, which is in public domain: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c "Permission to use, copy, modify, and distribute this software for any purpose and without fee is hereby granted. The author disclaims all warranties with regard to this software." Signed-off-by: Denys Vlasenko --- coreutils/cal.c | 28 ++++++++++++++++++++++------ coreutils/df.c | 2 +- coreutils/expand.c | 4 ++-- coreutils/ls.c | 6 +++--- 4 files changed, 28 insertions(+), 12 deletions(-) (limited to 'coreutils') diff --git a/coreutils/cal.c b/coreutils/cal.c index e6f9af937..5ecb9131d 100644 --- a/coreutils/cal.c +++ b/coreutils/cal.c @@ -16,8 +16,8 @@ * * Major size reduction... over 50% (>1.5k) on i386. */ - #include "libbb.h" +#include "unicode.h" /* We often use "unsigned" intead of "int", it's easier to div on most CPUs */ @@ -83,9 +83,16 @@ int cal_main(int argc UNUSED_PARAM, char **argv) time_t now; unsigned month, year, flags, i; char *month_names[12]; - char day_headings[28]; /* 28 for julian, 21 for nonjulian */ + /* normal heading: */ + /* "Su Mo Tu We Th Fr Sa" */ + /* -j heading: */ + /* " Su Mo Tu We Th Fr Sa" */ + char day_headings[ENABLE_FEATURE_ASSUME_UNICODE ? 28 * 6 : 28]; + IF_FEATURE_ASSUME_UNICODE(char *hp = day_headings;) char buf[40]; + init_unicode(); + flags = getopt32(argv, "jy"); /* This sets julian = flags & 1: */ option_mask32 &= 1; @@ -122,15 +129,24 @@ int cal_main(int argc UNUSED_PARAM, char **argv) if (i < 7) { zero_tm.tm_wday = i; -//FIXME: unicode -//Bug 839: -//testcase with doublewidth Japanese chars: "LANG=zh_TW.utf8 cal" -//perhaps use wc[s]width() to probe terminal width /* abbreviated weekday name according to locale */ strftime(buf, sizeof(buf), "%a", &zero_tm); +#if ENABLE_FEATURE_ASSUME_UNICODE + if (julian) + *hp++ = ' '; + { + char *two_wchars = unicode_cut_nchars(2, buf); + strcpy(hp, two_wchars); + free(two_wchars); + } + hp += strlen(hp); + *hp++ = ' '; +#else strncpy(day_headings + i * (3+julian) + julian, buf, 2); +#endif } } while (++i < 12); + IF_FEATURE_ASSUME_UNICODE(hp[-1] = '\0';) if (month) { unsigned row, len, days[MAXDAYS]; diff --git a/coreutils/df.c b/coreutils/df.c index bcde78393..ae68f0831 100644 --- a/coreutils/df.c +++ b/coreutils/df.c @@ -178,7 +178,7 @@ int df_main(int argc UNUSED_PARAM, char **argv) #endif #if ENABLE_FEATURE_ASSUME_UNICODE - dev_len = bb_mbstrlen(device); + dev_len = unicode_strlen(device); if (dev_len > 20) { printf("%s\n%20s", device, ""); } else { diff --git a/coreutils/expand.c b/coreutils/expand.c index 649b4c175..2f6a708b5 100644 --- a/coreutils/expand.c +++ b/coreutils/expand.c @@ -49,7 +49,7 @@ static void expand(FILE *file, unsigned tab_size, unsigned opt) unsigned len; *ptr = '\0'; # if ENABLE_FEATURE_ASSUME_UNICODE - len = bb_mbstrlen(ptr_strbeg); + len = unicode_strlen(ptr_strbeg); # else len = ptr - ptr_strbeg; # endif @@ -105,7 +105,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt) char c; c = ptr[n]; ptr[n] = '\0'; - len = bb_mbstrlen(ptr); + len = unicode_strlen(ptr); ptr[n] = c; } # else diff --git a/coreutils/ls.c b/coreutils/ls.c index b8da1adbb..e7544474b 100644 --- a/coreutils/ls.c +++ b/coreutils/ls.c @@ -550,7 +550,7 @@ static void showfiles(struct dnode **dn, unsigned nfiles) } else { /* find the longest file name, use that as the column width */ for (i = 0; dn[i]; i++) { - int len = bb_mbstrlen(dn[i]->name); + int len = unicode_strlen(dn[i]->name); if (column_width < len) column_width = len; } @@ -742,7 +742,7 @@ static int print_name(const char *name) { if (option_mask32 & OPT_Q) { #if ENABLE_FEATURE_ASSUME_UNICODE - unsigned len = 2 + bb_mbstrlen(name); + unsigned len = 2 + unicode_strlen(name); #else unsigned len = 2; #endif @@ -762,7 +762,7 @@ static int print_name(const char *name) /* No -Q: */ #if ENABLE_FEATURE_ASSUME_UNICODE fputs(name, stdout); - return bb_mbstrlen(name); + return unicode_strlen(name); #else return printf("%s", name); #endif -- cgit v1.2.3