diff options
author | Jarno Mäkipää <jmakip87@gmail.com> | 2019-10-18 20:21:57 +0300 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2019-10-26 19:09:00 -0500 |
commit | 01f18c4c6ee68cbd58944e21d1fe36991315a889 (patch) | |
tree | 139ca0888bd373a8f48f3fbcc3b038f94ae31b60 | |
parent | b7265da4ccdfe4d256e72dc1b2a0f6b54e087ad2 (diff) | |
download | toybox-01f18c4c6ee68cbd58944e21d1fe36991315a889.tar.gz |
cut: re-enable crunch_str on cut -C
Reason: unicolumns() does not print combining characters correctly
Combining characters follow the character which they modify.
https://www.cl.cam.ac.uk/~mgk25/unicode.html#comb
xterm renders cut test1.txt -C -1 now correctly
-rwxr-xr-x | tests/cut.test | 3 | ||||
-rw-r--r-- | toys/posix/cut.c | 38 |
2 files changed, 10 insertions, 31 deletions
diff --git a/tests/cut.test b/tests/cut.test index e475288a..8d8c4ba1 100755 --- a/tests/cut.test +++ b/tests/cut.test @@ -34,6 +34,9 @@ testing "-c a,b-c,d" "cut -c 3,5-7,10 abc.txt" "etwoh\npa:ba\nequi \n" "" "" toyonly testing "-c japan.txt" 'cut -c 3-6,9-12 "$FILES/utf8/japan.txt"' \ "ガラスをられます\n" "" "" +toyonly testing "-C test1.txt" 'cut -C -1 "$FILES/utf8/test1.txt"' \ + "l̴̗̞̠\n" "" "" + # substitute for awk toyonly testcmd "-DF" "-DF 2,7,5" \ "said and your\nare\nis demand. supply\nforecast :\nyou you better,\n\nEm: Took hate\n" "" \ diff --git a/toys/posix/cut.c b/toys/posix/cut.c index 9f7f7458..61b2b409 100644 --- a/toys/posix/cut.c +++ b/toys/posix/cut.c @@ -46,28 +46,6 @@ GLOBALS( regex_t reg; ) -// Return number of bytes to start of first column fitting in columns -// invalid sequences are skipped/ignored -int unicolumns(char *start, unsigned columns) -{ - int i, j = 0; - wchar_t wc; - char *s = start, *ss = start; - - // Skip start, rounding down if we hit a multicolumn char - while (j<columns && (i = utf8towc(&wc, s, 4))) { - if (i<0) s++; - else { - s += i; - if (0<(i = wcwidth(wc))) { - if ((j += i)>columns) break; - ss = s; - } - } - } - - return ss-start; -} // Apply selections to an input line, producing output static void cut_line(char **pline, long len) @@ -99,15 +77,13 @@ static void cut_line(char **pline, long len) // crunch_str() currently assumes that combining characters get // escaped, to provide an unambiguous visual representation. // This assumes the input string is null terminated. - //if (start) crunch_str(&s, start, 0, 0, 0); - //if (!*s) continue; - //start = s-line; - //ss = s; - //crunch_str(&ss, count, 0, 0, 0); - //count = ss-s; - - s += unicolumns(s, start); - count = unicolumns(s, end-start); + if (start) crunch_str(&s, start, 0, 0, 0); + if (!*s) continue; + start = s-line; + ss = s; + crunch_str(&ss, count, 0, 0, 0); + count = ss-s; + } else if (toys.optflags&FLAG_c) { wchar_t wc; char *sss; |