From c538d5bcc304d1ac99783de2337937c70a7013c7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 13 Aug 2014 09:57:44 +0200 Subject: hush: make ${#var} unicode-aware This mimics bash Signed-off-by: Denys Vlasenko --- shell/hush.c | 29 +++++++++++++++++++---------- shell/hush_test/hush-misc/unicode1.right | 3 +++ shell/hush_test/hush-misc/unicode1.tests | 13 +++++++++++++ 3 files changed, 35 insertions(+), 10 deletions(-) create mode 100644 shell/hush_test/hush-misc/unicode1.right create mode 100755 shell/hush_test/hush-misc/unicode1.tests diff --git a/shell/hush.c b/shell/hush.c index e1d0ece29..7d3547110 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -1976,6 +1976,22 @@ static struct variable *set_vars_and_save_old(char **strings) } +/* + * Unicode helper + */ +static void reinit_unicode_for_hush(void) +{ + /* Unicode support should be activated even if LANG is set + * _during_ shell execution, not only if it was set when + * shell was started. Therefore, re-check LANG every time: + */ + const char *s = get_local_var_value("LC_ALL"); + if (!s) s = get_local_var_value("LC_CTYPE"); + if (!s) s = get_local_var_value("LANG"); + reinit_unicode(s); +} + + /* * in_str support */ @@ -2042,15 +2058,7 @@ static void get_user_input(struct in_str *i) /* Enable command line editing only while a command line * is actually being read */ do { - /* Unicode support should be activated even if LANG is set - * _during_ shell execution, not only if it was set when - * shell was started. Therefore, re-check LANG every time: - */ - const char *s = get_local_var_value("LC_ALL"); - if (!s) s = get_local_var_value("LC_CTYPE"); - if (!s) s = get_local_var_value("LANG"); - reinit_unicode(s); - + reinit_unicode_for_hush(); G.flag_SIGINT = 0; /* buglet: SIGINT will not make new prompt to appear _at once_, * only after . (^C will work) */ @@ -5028,8 +5036,9 @@ static NOINLINE const char *expand_one_var(char **to_be_freed_pp, char *arg, cha /* Handle any expansions */ if (exp_op == 'L') { + reinit_unicode_for_hush(); debug_printf_expand("expand: length(%s)=", val); - val = utoa(val ? strlen(val) : 0); + val = utoa(val ? unicode_strlen(val) : 0); debug_printf_expand("%s\n", val); } else if (exp_op) { if (exp_op == '%' || exp_op == '#') { diff --git a/shell/hush_test/hush-misc/unicode1.right b/shell/hush_test/hush-misc/unicode1.right new file mode 100644 index 000000000..d3bbbf697 --- /dev/null +++ b/shell/hush_test/hush-misc/unicode1.right @@ -0,0 +1,3 @@ +1 +1 +Ok diff --git a/shell/hush_test/hush-misc/unicode1.tests b/shell/hush_test/hush-misc/unicode1.tests new file mode 100755 index 000000000..8788ba910 --- /dev/null +++ b/shell/hush_test/hush-misc/unicode1.tests @@ -0,0 +1,13 @@ +LANG=en_US.UTF-8 + +# A combining character U+300 +a=`printf "\xcc\x80"` +# Should print 1 +echo ${#a} + +# A Japanese katakana charachter U+30a3 +a=`printf "\xe3\x82\xa3"` +# Should print 1 +echo ${#a} + +echo Ok -- cgit v1.2.3