diff options
author | Elliott Hughes <enh@google.com> | 2019-11-22 13:34:01 -0800 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2019-11-23 10:03:43 -0600 |
commit | d2f6a12763b09ecd0c627c2d86d8b86fb4b62113 (patch) | |
tree | f792a16abbafdc91ec5a0ef4265e4951acafa4c4 | |
parent | deee4f033f190e59eedc82e73b94b6b4b97d0a6f (diff) | |
download | toybox-d2f6a12763b09ecd0c627c2d86d8b86fb4b62113.tar.gz |
Fix iconv and tests on Mac.
The Mac iconv_open(3) doesn't follow Unicode TR#22 rules for charset
alias matching that bionic and glibc do (and, strictly, POSIX doesn't
say you have to even though it's obviously a good idea), so we have
to say exactly "UTF-8" rather than "utf8".
Additionally, the 2006-era bash 3.2 on current versions of macOS
(because it was the last GPLv2 bash) seems to have bugs that cause
it to mangle UTF-8 input, so we can't reliably echo a UTF-8 sequence
into a file. Use \x in the tests to work around this.
-rwxr-xr-x | tests/iconv.test | 13 | ||||
-rw-r--r-- | toys/posix/iconv.c | 8 |
2 files changed, 12 insertions, 9 deletions
diff --git a/tests/iconv.test b/tests/iconv.test index 8fe7c7a5..d0a3cb1d 100755 --- a/tests/iconv.test +++ b/tests/iconv.test @@ -4,12 +4,15 @@ # Example characters from https://en.wikipedia.org/wiki/UTF-16: # $:U+0024 €:U+20ac 𐐷:U+10437[==U+d801,U+dc37] -echo -n "$€𐐷" > chars +# We can't simply use echo because bash 3.2 on the Mac mangles it, but toysh +# should let us go back to just this when it's available... +# echo -n "$€𐐷" > chars +echo -ne "\x24\xe2\x82\xac\xf0\x90\x90\xb7" > chars #testing "name" "command" "result" "infile" "stdin" testing "" "iconv chars | xxd -p" "24e282acf09090b7\n" "" "" -testing "-t UTF-16BE" "iconv -t utf16be chars | xxd -p" "002420acd801dc37\n" "" "" -testing "-t UTF-16LE" "iconv -t utf16le chars | xxd -p" "2400ac2001d837dc\n" "" "" -testing "-t UTF-32BE" "iconv -t utf32be chars | xxd -p" "00000024000020ac00010437\n" "" "" -testing "-t UTF-32BE" "iconv -t utf32le chars | xxd -p" "24000000ac20000037040100\n" "" "" +testing "-t UTF-16BE" "iconv -t UTF-16BE chars | xxd -p" "002420acd801dc37\n" "" "" +testing "-t UTF-16LE" "iconv -t UTF-16LE chars | xxd -p" "2400ac2001d837dc\n" "" "" +testing "-t UTF-32BE" "iconv -t UTF-32BE chars | xxd -p" "00000024000020ac00010437\n" "" "" +testing "-t UTF-32LE" "iconv -t UTF-32LE chars | xxd -p" "24000000ac20000037040100\n" "" "" diff --git a/toys/posix/iconv.c b/toys/posix/iconv.c index ce375b4a..d2721672 100644 --- a/toys/posix/iconv.c +++ b/toys/posix/iconv.c @@ -18,8 +18,8 @@ config ICONV Convert character encoding of files. -c Omit invalid chars - -f Convert from (default utf8) - -t Convert to (default utf8) + -f Convert from (default UTF-8) + -t Convert to (default UTF-8) */ #define FOR_iconv @@ -63,8 +63,8 @@ static void do_iconv(int fd, char *name) void iconv_main(void) { - if (!TT.t) TT.t = "utf8"; - if (!TT.f) TT.f = "utf8"; + if (!TT.t) TT.t = "UTF-8"; + if (!TT.f) TT.f = "UTF-8"; if ((iconv_t)-1 == (TT.ic = iconv_open(TT.t, TT.f))) perror_exit("%s/%s", TT.t, TT.f); |