From d2b1ba6fdee59645763e915ade3ec55e67d5839a Mon Sep 17 00:00:00 2001 From: Tomas Heinrich Date: Mon, 4 Jan 2010 16:21:31 +0100 Subject: [un]expand: unicode support function old new delta expand_main 633 663 +30 Signed-off-by: Tomas Heinrich Signed-off-by: Denys Vlasenko --- coreutils/expand.c | 51 +++++++++++++++++++++++++++++++----------------- testsuite/expand.tests | 6 ++++++ testsuite/unexpand.tests | 3 +++ 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/coreutils/expand.c b/coreutils/expand.c index 7137c3b89..e08eb1d0a 100644 --- a/coreutils/expand.c +++ b/coreutils/expand.c @@ -20,8 +20,8 @@ * * Caveat: this versions of expand and unexpand don't accept tab lists. */ - #include "libbb.h" +#include "unicode.h" enum { OPT_INITIAL = 1 << 0, @@ -30,35 +30,37 @@ enum { }; #if ENABLE_EXPAND -static void expand(FILE *file, int tab_size, unsigned opt) +static void expand(FILE *file, unsigned tab_size, unsigned opt) { char *line; - tab_size = -tab_size; - while ((line = xmalloc_fgets(file)) != NULL) { - int pos; unsigned char c; - char *ptr = line; + char *ptr; + char *ptr_strbeg; - goto start; + ptr = ptr_strbeg = line; while ((c = *ptr) != '\0') { if ((opt & OPT_INITIAL) && !isblank(c)) { - fputs(ptr, stdout); + /* not space or tab */ break; } - ptr++; if (c == '\t') { - c = ' '; - while (++pos < 0) - bb_putchar(c); - } - bb_putchar(c); - if (++pos >= 0) { - start: - pos = tab_size; + unsigned len; + *ptr = '\0'; +# if ENABLE_FEATURE_ASSUME_UNICODE + len = bb_mbstrlen(ptr_strbeg); +# else + len = ptr - ptr_strbeg; +# endif + len = tab_size - (len % tab_size); + /*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */ + printf("%s%*s", ptr_strbeg, len, ""); + ptr_strbeg = ptr + 1; } + ptr++; } + fputs(ptr_strbeg, stdout); free(line); } } @@ -75,6 +77,7 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt) while (*ptr) { unsigned n; + unsigned len; while (*ptr == ' ') { column++; @@ -97,8 +100,19 @@ static void unexpand(FILE *file, unsigned tab_size, unsigned opt) } n = strcspn(ptr, "\t "); printf("%*s%.*s", column, "", n, ptr); +# if ENABLE_FEATURE_ASSUME_UNICODE + { + char c; + c = ptr[n]; + ptr[n] = '\0'; + len = bb_mbstrlen(ptr); + ptr[n] = c; + } +# else + len = n; +# endif ptr += n; - column = (column + n) % tab_size; + column = (column + len) % tab_size; } free(line); } @@ -130,6 +144,7 @@ int expand_main(int argc UNUSED_PARAM, char **argv) "all\0" No_argument "a" ; #endif + check_unicode_in_env(); if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) { IF_FEATURE_EXPAND_LONG_OPTIONS(applet_long_options = expand_longopts); diff --git a/testsuite/expand.tests b/testsuite/expand.tests index 57498a201..996631450 100755 --- a/testsuite/expand.tests +++ b/testsuite/expand.tests @@ -12,4 +12,10 @@ testing "expand" \ "" \ "\t12345678\t12345678\n" \ +testing "expand with unicode characher 0x394" \ + "expand" \ + "Δ 12345ΔΔΔ 12345678\n" \ + "" \ + "Δ\t12345ΔΔΔ\t12345678\n" \ + exit $FAILCOUNT diff --git a/testsuite/unexpand.tests b/testsuite/unexpand.tests index 5c82a1e50..5c2a29b5f 100755 --- a/testsuite/unexpand.tests +++ b/testsuite/unexpand.tests @@ -27,4 +27,7 @@ testing "unexpand case 6" "unexpand" \ testing "unexpand case 7" "unexpand" \ "123\t 45678\n" "" "123 \t 45678\n" \ +testing "unexpand with unicode characher 0x394" "unexpand" \ + "1ΔΔΔ5\t99999\n" "" "1ΔΔΔ5 99999\n" \ + exit $FAILCOUNT -- cgit v1.2.3