aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2019-11-22 13:34:01 -0800
committerRob Landley <rob@landley.net>2019-11-23 10:03:43 -0600
commitd2f6a12763b09ecd0c627c2d86d8b86fb4b62113 (patch)
treef792a16abbafdc91ec5a0ef4265e4951acafa4c4
parentdeee4f033f190e59eedc82e73b94b6b4b97d0a6f (diff)
downloadtoybox-d2f6a12763b09ecd0c627c2d86d8b86fb4b62113.tar.gz
Fix iconv and tests on Mac.
The Mac iconv_open(3) doesn't follow Unicode TR#22 rules for charset alias matching that bionic and glibc do (and, strictly, POSIX doesn't say you have to even though it's obviously a good idea), so we have to say exactly "UTF-8" rather than "utf8". Additionally, the 2006-era bash 3.2 on current versions of macOS (because it was the last GPLv2 bash) seems to have bugs that cause it to mangle UTF-8 input, so we can't reliably echo a UTF-8 sequence into a file. Use \x in the tests to work around this.
-rwxr-xr-xtests/iconv.test13
-rw-r--r--toys/posix/iconv.c8
2 files changed, 12 insertions, 9 deletions
diff --git a/tests/iconv.test b/tests/iconv.test
index 8fe7c7a5..d0a3cb1d 100755
--- a/tests/iconv.test
+++ b/tests/iconv.test
@@ -4,12 +4,15 @@
# Example characters from https://en.wikipedia.org/wiki/UTF-16:
# $:U+0024 €:U+20ac 𐐷:U+10437[==U+d801,U+dc37]
-echo -n "$€𐐷" > chars
+# We can't simply use echo because bash 3.2 on the Mac mangles it, but toysh
+# should let us go back to just this when it's available...
+# echo -n "$€𐐷" > chars
+echo -ne "\x24\xe2\x82\xac\xf0\x90\x90\xb7" > chars
#testing "name" "command" "result" "infile" "stdin"
testing "" "iconv chars | xxd -p" "24e282acf09090b7\n" "" ""
-testing "-t UTF-16BE" "iconv -t utf16be chars | xxd -p" "002420acd801dc37\n" "" ""
-testing "-t UTF-16LE" "iconv -t utf16le chars | xxd -p" "2400ac2001d837dc\n" "" ""
-testing "-t UTF-32BE" "iconv -t utf32be chars | xxd -p" "00000024000020ac00010437\n" "" ""
-testing "-t UTF-32BE" "iconv -t utf32le chars | xxd -p" "24000000ac20000037040100\n" "" ""
+testing "-t UTF-16BE" "iconv -t UTF-16BE chars | xxd -p" "002420acd801dc37\n" "" ""
+testing "-t UTF-16LE" "iconv -t UTF-16LE chars | xxd -p" "2400ac2001d837dc\n" "" ""
+testing "-t UTF-32BE" "iconv -t UTF-32BE chars | xxd -p" "00000024000020ac00010437\n" "" ""
+testing "-t UTF-32LE" "iconv -t UTF-32LE chars | xxd -p" "24000000ac20000037040100\n" "" ""
diff --git a/toys/posix/iconv.c b/toys/posix/iconv.c
index ce375b4a..d2721672 100644
--- a/toys/posix/iconv.c
+++ b/toys/posix/iconv.c
@@ -18,8 +18,8 @@ config ICONV
Convert character encoding of files.
-c Omit invalid chars
- -f Convert from (default utf8)
- -t Convert to (default utf8)
+ -f Convert from (default UTF-8)
+ -t Convert to (default UTF-8)
*/
#define FOR_iconv
@@ -63,8 +63,8 @@ static void do_iconv(int fd, char *name)
void iconv_main(void)
{
- if (!TT.t) TT.t = "utf8";
- if (!TT.f) TT.f = "utf8";
+ if (!TT.t) TT.t = "UTF-8";
+ if (!TT.f) TT.f = "UTF-8";
if ((iconv_t)-1 == (TT.ic = iconv_open(TT.t, TT.f)))
perror_exit("%s/%s", TT.t, TT.f);