From d2f6a12763b09ecd0c627c2d86d8b86fb4b62113 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 22 Nov 2019 13:34:01 -0800 Subject: Fix iconv and tests on Mac. The Mac iconv_open(3) doesn't follow Unicode TR#22 rules for charset alias matching that bionic and glibc do (and, strictly, POSIX doesn't say you have to even though it's obviously a good idea), so we have to say exactly "UTF-8" rather than "utf8". Additionally, the 2006-era bash 3.2 on current versions of macOS (because it was the last GPLv2 bash) seems to have bugs that cause it to mangle UTF-8 input, so we can't reliably echo a UTF-8 sequence into a file. Use \x in the tests to work around this. --- tests/iconv.test | 13 ++++++++----- toys/posix/iconv.c | 8 ++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/iconv.test b/tests/iconv.test index 8fe7c7a5..d0a3cb1d 100755 --- a/tests/iconv.test +++ b/tests/iconv.test @@ -4,12 +4,15 @@ # Example characters from https://en.wikipedia.org/wiki/UTF-16: # $:U+0024 €:U+20ac 𐐷:U+10437[==U+d801,U+dc37] -echo -n "$€𐐷" > chars +# We can't simply use echo because bash 3.2 on the Mac mangles it, but toysh +# should let us go back to just this when it's available... +# echo -n "$€𐐷" > chars +echo -ne "\x24\xe2\x82\xac\xf0\x90\x90\xb7" > chars #testing "name" "command" "result" "infile" "stdin" testing "" "iconv chars | xxd -p" "24e282acf09090b7\n" "" "" -testing "-t UTF-16BE" "iconv -t utf16be chars | xxd -p" "002420acd801dc37\n" "" "" -testing "-t UTF-16LE" "iconv -t utf16le chars | xxd -p" "2400ac2001d837dc\n" "" "" -testing "-t UTF-32BE" "iconv -t utf32be chars | xxd -p" "00000024000020ac00010437\n" "" "" -testing "-t UTF-32BE" "iconv -t utf32le chars | xxd -p" "24000000ac20000037040100\n" "" "" +testing "-t UTF-16BE" "iconv -t UTF-16BE chars | xxd -p" "002420acd801dc37\n" "" "" +testing "-t UTF-16LE" "iconv -t UTF-16LE chars | xxd -p" "2400ac2001d837dc\n" "" "" +testing "-t UTF-32BE" "iconv -t UTF-32BE chars | xxd -p" "00000024000020ac00010437\n" "" "" +testing "-t UTF-32LE" "iconv -t UTF-32LE chars | xxd -p" "24000000ac20000037040100\n" "" "" diff --git a/toys/posix/iconv.c b/toys/posix/iconv.c index ce375b4a..d2721672 100644 --- a/toys/posix/iconv.c +++ b/toys/posix/iconv.c @@ -18,8 +18,8 @@ config ICONV Convert character encoding of files. -c Omit invalid chars - -f Convert from (default utf8) - -t Convert to (default utf8) + -f Convert from (default UTF-8) + -t Convert to (default UTF-8) */ #define FOR_iconv @@ -63,8 +63,8 @@ static void do_iconv(int fd, char *name) void iconv_main(void) { - if (!TT.t) TT.t = "utf8"; - if (!TT.f) TT.f = "utf8"; + if (!TT.t) TT.t = "UTF-8"; + if (!TT.f) TT.f = "UTF-8"; if ((iconv_t)-1 == (TT.ic = iconv_open(TT.t, TT.f))) perror_exit("%s/%s", TT.t, TT.f); -- cgit v1.2.3