From 01f18c4c6ee68cbd58944e21d1fe36991315a889 Mon Sep 17 00:00:00 2001 From: Jarno Mäkipää Date: Fri, 18 Oct 2019 20:21:57 +0300 Subject: cut: re-enable crunch_str on cut -C Reason: unicolumns() does not print combining characters correctly Combining characters follow the character which they modify. https://www.cl.cam.ac.uk/~mgk25/unicode.html#comb xterm renders cut test1.txt -C -1 now correctly --- tests/cut.test | 3 +++ toys/posix/cut.c | 38 +++++++------------------------------- 2 files changed, 10 insertions(+), 31 deletions(-) diff --git a/tests/cut.test b/tests/cut.test index e475288a..8d8c4ba1 100755 --- a/tests/cut.test +++ b/tests/cut.test @@ -34,6 +34,9 @@ testing "-c a,b-c,d" "cut -c 3,5-7,10 abc.txt" "etwoh\npa:ba\nequi \n" "" "" toyonly testing "-c japan.txt" 'cut -c 3-6,9-12 "$FILES/utf8/japan.txt"' \ "ガラスをられます\n" "" "" +toyonly testing "-C test1.txt" 'cut -C -1 "$FILES/utf8/test1.txt"' \ + "l̴̗̞̠\n" "" "" + # substitute for awk toyonly testcmd "-DF" "-DF 2,7,5" \ "said and your\nare\nis demand. supply\nforecast :\nyou you better,\n\nEm: Took hate\n" "" \ diff --git a/toys/posix/cut.c b/toys/posix/cut.c index 9f7f7458..61b2b409 100644 --- a/toys/posix/cut.c +++ b/toys/posix/cut.c @@ -46,28 +46,6 @@ GLOBALS( regex_t reg; ) -// Return number of bytes to start of first column fitting in columns -// invalid sequences are skipped/ignored -int unicolumns(char *start, unsigned columns) -{ - int i, j = 0; - wchar_t wc; - char *s = start, *ss = start; - - // Skip start, rounding down if we hit a multicolumn char - while (jcolumns) break; - ss = s; - } - } - } - - return ss-start; -} // Apply selections to an input line, producing output static void cut_line(char **pline, long len) @@ -99,15 +77,13 @@ static void cut_line(char **pline, long len) // crunch_str() currently assumes that combining characters get // escaped, to provide an unambiguous visual representation. // This assumes the input string is null terminated. - //if (start) crunch_str(&s, start, 0, 0, 0); - //if (!*s) continue; - //start = s-line; - //ss = s; - //crunch_str(&ss, count, 0, 0, 0); - //count = ss-s; - - s += unicolumns(s, start); - count = unicolumns(s, end-start); + if (start) crunch_str(&s, start, 0, 0, 0); + if (!*s) continue; + start = s-line; + ss = s; + crunch_str(&ss, count, 0, 0, 0); + count = ss-s; + } else if (toys.optflags&FLAG_c) { wchar_t wc; char *sss; -- cgit v1.2.3