From fda9fafe279d9394ad53313320a949c86f646734 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 5 Jul 2017 19:10:21 +0200 Subject: ash: fix matching of unicode greek letter rho (cf 81) and similar cases Signed-off-by: Denys Vlasenko --- shell/ash.c | 23 +++++++++++++++++- shell/ash_test/ash-quoting/unicode_8x_chars.right | 6 +++++ shell/ash_test/ash-quoting/unicode_8x_chars.tests | 28 ++++++++++++++++++++++ .../hush_test/hush-quoting/unicode_8x_chars.right | 6 +++++ .../hush_test/hush-quoting/unicode_8x_chars.tests | 28 ++++++++++++++++++++++ 5 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 shell/ash_test/ash-quoting/unicode_8x_chars.right create mode 100755 shell/ash_test/ash-quoting/unicode_8x_chars.tests create mode 100644 shell/hush_test/hush-quoting/unicode_8x_chars.right create mode 100755 shell/hush_test/hush-quoting/unicode_8x_chars.tests (limited to 'shell') diff --git a/shell/ash.c b/shell/ash.c index 6d46e3719..e5fdd1646 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -5913,6 +5913,7 @@ rmescapes(char *str, int flag) while (*p) { if ((unsigned char)*p == CTLQUOTEMARK) { // Note: both inquotes and protect_against_glob only affect whether +// CTLESC, gets converted to or to \ inquotes = ~inquotes; p++; protect_against_glob = globbing; @@ -5925,7 +5926,27 @@ rmescapes(char *str, int flag) ash_msg_and_raise_error("CTLESC at EOL (shouldn't happen)"); #endif if (protect_against_glob) { - *q++ = '\\'; + /* + * We used to trust glob() and fnmatch() to eat + * superfluous escapes (\z where z has no + * special meaning anyway). But this causes + * bugs such as string of one greek letter rho + * (unicode-encoded as two bytes 'cf,81") + * getting encoded as "cf,CTLESC,81" + * and here, converted to "cf,\,81" - + * which does not go well with some flavors + * of fnmatch() in unicode locales. + * + * Lets add "\" only on the chars which need it. + */ + if (*p == '*' + || *p == '?' + || *p == '[' + /* || *p == ']' maybe also this? */ + || *p == '\\' + ) { + *q++ = '\\'; + } } } else if (*p == '\\' && !inquotes) { /* naked back slash */ diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.right b/shell/ash_test/ash-quoting/unicode_8x_chars.right new file mode 100644 index 000000000..7780b88b4 --- /dev/null +++ b/shell/ash_test/ash-quoting/unicode_8x_chars.right @@ -0,0 +1,6 @@ +ok +ok +ok +ok +ok +ok diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.tests b/shell/ash_test/ash-quoting/unicode_8x_chars.tests new file mode 100755 index 000000000..1258745ec --- /dev/null +++ b/shell/ash_test/ash-quoting/unicode_8x_chars.tests @@ -0,0 +1,28 @@ +# Unicode: cf 80 +case π in +( "π" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 81 +case ρ in +( "ρ" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 82 +case ς in +( "ς" ) echo ok ;; +( * ) echo WRONG ;; +esac + +case "π" in +( π ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ρ" in +( ρ ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ς" in +( ς ) echo ok ;; +( * ) echo WRONG ;; +esac diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.right b/shell/hush_test/hush-quoting/unicode_8x_chars.right new file mode 100644 index 000000000..7780b88b4 --- /dev/null +++ b/shell/hush_test/hush-quoting/unicode_8x_chars.right @@ -0,0 +1,6 @@ +ok +ok +ok +ok +ok +ok diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.tests b/shell/hush_test/hush-quoting/unicode_8x_chars.tests new file mode 100755 index 000000000..1258745ec --- /dev/null +++ b/shell/hush_test/hush-quoting/unicode_8x_chars.tests @@ -0,0 +1,28 @@ +# Unicode: cf 80 +case π in +( "π" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 81 +case ρ in +( "ρ" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 82 +case ς in +( "ς" ) echo ok ;; +( * ) echo WRONG ;; +esac + +case "π" in +( π ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ρ" in +( ρ ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ς" in +( ς ) echo ok ;; +( * ) echo WRONG ;; +esac -- cgit v1.2.3