aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarno Mäkipää <jmakip87@gmail.com>2019-10-18 20:21:57 +0300
committerRob Landley <rob@landley.net>2019-10-26 19:09:00 -0500
commit01f18c4c6ee68cbd58944e21d1fe36991315a889 (patch)
tree139ca0888bd373a8f48f3fbcc3b038f94ae31b60
parentb7265da4ccdfe4d256e72dc1b2a0f6b54e087ad2 (diff)
downloadtoybox-01f18c4c6ee68cbd58944e21d1fe36991315a889.tar.gz
cut: re-enable crunch_str on cut -C
Reason: unicolumns() does not print combining characters correctly Combining characters follow the character which they modify. https://www.cl.cam.ac.uk/~mgk25/unicode.html#comb xterm renders cut test1.txt -C -1 now correctly
-rwxr-xr-xtests/cut.test3
-rw-r--r--toys/posix/cut.c38
2 files changed, 10 insertions, 31 deletions
diff --git a/tests/cut.test b/tests/cut.test
index e475288a..8d8c4ba1 100755
--- a/tests/cut.test
+++ b/tests/cut.test
@@ -34,6 +34,9 @@ testing "-c a,b-c,d" "cut -c 3,5-7,10 abc.txt" "etwoh\npa:ba\nequi \n" "" ""
toyonly testing "-c japan.txt" 'cut -c 3-6,9-12 "$FILES/utf8/japan.txt"' \
"ガラスをられます\n" "" ""
+toyonly testing "-C test1.txt" 'cut -C -1 "$FILES/utf8/test1.txt"' \
+ "l̴̗̞̠\n" "" ""
+
# substitute for awk
toyonly testcmd "-DF" "-DF 2,7,5" \
"said and your\nare\nis demand. supply\nforecast :\nyou you better,\n\nEm: Took hate\n" "" \
diff --git a/toys/posix/cut.c b/toys/posix/cut.c
index 9f7f7458..61b2b409 100644
--- a/toys/posix/cut.c
+++ b/toys/posix/cut.c
@@ -46,28 +46,6 @@ GLOBALS(
regex_t reg;
)
-// Return number of bytes to start of first column fitting in columns
-// invalid sequences are skipped/ignored
-int unicolumns(char *start, unsigned columns)
-{
- int i, j = 0;
- wchar_t wc;
- char *s = start, *ss = start;
-
- // Skip start, rounding down if we hit a multicolumn char
- while (j<columns && (i = utf8towc(&wc, s, 4))) {
- if (i<0) s++;
- else {
- s += i;
- if (0<(i = wcwidth(wc))) {
- if ((j += i)>columns) break;
- ss = s;
- }
- }
- }
-
- return ss-start;
-}
// Apply selections to an input line, producing output
static void cut_line(char **pline, long len)
@@ -99,15 +77,13 @@ static void cut_line(char **pline, long len)
// crunch_str() currently assumes that combining characters get
// escaped, to provide an unambiguous visual representation.
// This assumes the input string is null terminated.
- //if (start) crunch_str(&s, start, 0, 0, 0);
- //if (!*s) continue;
- //start = s-line;
- //ss = s;
- //crunch_str(&ss, count, 0, 0, 0);
- //count = ss-s;
-
- s += unicolumns(s, start);
- count = unicolumns(s, end-start);
+ if (start) crunch_str(&s, start, 0, 0, 0);
+ if (!*s) continue;
+ start = s-line;
+ ss = s;
+ crunch_str(&ss, count, 0, 0, 0);
+ count = ss-s;
+
} else if (toys.optflags&FLAG_c) {
wchar_t wc;
char *sss;