aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2017-12-29 16:43:38 -0600
committerRob Landley <rob@landley.net>2017-12-29 16:43:38 -0600
commit78c3fcd224c83b454e80a2160ec62e8daf3be5ab (patch)
treefa6e849b67993b6dcc697d2f37c232a66e2ff553
parent12a0b40d09546839f67c8c216d0f20a6609716b2 (diff)
downloadtoybox-78c3fcd224c83b454e80a2160ec62e8daf3be5ab.tar.gz
Cleanup iconv.
Implement -c, fix endless loop when illegal char happens with !outleft (don't need to check errno, just in == toybuf), refill buffer each time (less efficient handling of illegal chars but never have to worry about how long constitutes a valid sequence in unknown encoding). Fix input longer than 2k (memmove() has src/dest switched, second time through loop in starts at offset inleft...) Made it start at beginning every time. Switch iconv_open() error msg to show to/from and errno (rather than hardwired english text).
-rw-r--r--toys/pending/iconv.c54
1 files changed, 26 insertions, 28 deletions
diff --git a/toys/pending/iconv.c b/toys/pending/iconv.c
index 75ff6397..9b3c76b3 100644
--- a/toys/pending/iconv.c
+++ b/toys/pending/iconv.c
@@ -15,8 +15,9 @@ config ICONV
Convert character encoding of files.
- -f convert from (default utf8)
- -t convert to (default utf8)
+ -c Omit invalid chars
+ -f convert from (default utf8)
+ -t convert to (default utf8)
*/
#define FOR_iconv
@@ -33,42 +34,39 @@ GLOBALS(
static void do_iconv(int fd, char *name)
{
char *outstart = toybuf+2048;
- size_t inleft = 0;
- int len = 1;
+ size_t outlen, inlen = 0;
+ int readlen = 1;
- do {
- size_t outleft = 2048;
- char *in = toybuf+inleft, *out = outstart;
+ for (;;) {
+ char *in = toybuf, *out = outstart;
- len = read(fd, in, 2048-inleft);
-
- if (len < 0) {
+ if (readlen && 0>(readlen = read(fd, in+inlen, 2048-inlen))) {
perror_msg("read '%s'", name);
return;
}
- inleft += len;
+ inlen += readlen;
+ if (!inlen) break;
- do {
- if (iconv(TT.ic, &in, &inleft, &out, &outleft) == -1
- && (errno == EILSEQ || (in == toybuf+inleft-len && errno == EINVAL)))
- {
- if (outleft) {
- // Skip first byte of illegal sequence to avoid endless loops
- *(out++) = *(in++);
- inleft--;
- }
- }
- xwrite(1, outstart, out-outstart);
- // Top off input buffer
- memmove(in, toybuf, inleft);
- } while (len < 1 && inleft);
- } while (len > 0);
+ outlen = 2048;
+ iconv(TT.ic, &in, &inlen, &out, &outlen);
+ if (in == toybuf) {
+ // Skip first byte of illegal sequence to avoid endless loops
+ if (toys.optflags & FLAG_c) in++;
+ else *(out++) = *(in++);
+ inlen--;
+ }
+ if (out != outstart) xwrite(1, outstart, out-outstart);
+ memmove(toybuf, in, inlen);
+ }
}
void iconv_main(void)
{
- TT.ic = iconv_open(TT.to ? TT.to : "utf8", TT.from ? TT.from : "utf8");
- if (TT.ic == (iconv_t)-1) error_exit("bad encoding");
+ if (!TT.to) TT.to = "utf8";
+ if (!TT.from) TT.from = "utf8";
+
+ if ((iconv_t)-1 == (TT.ic = iconv_open(TT.to, TT.from)))
+ perror_exit("%s/%s", TT.to, TT.from);
loopfiles(toys.optargs, do_iconv);
if (CFG_TOYBOX_FREE) iconv_close(TT.ic);
}