From f4c6375fad2a54770a05e1fffe3a39071fdf47fa Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Tue, 22 Jun 2021 09:55:58 -0500 Subject: Add support for -d $'\n' (cut by line!) and posix -nb (wraps to start of -c) --- tests/cut.test | 56 ++++++++++++++++++++++++++++++-------------------------- toys/posix/cut.c | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/tests/cut.test b/tests/cut.test index 8d8c4ba1..889fc186 100755 --- a/tests/cut.test +++ b/tests/cut.test @@ -13,29 +13,27 @@ echo "one:two:three:four:five:six:seven alpha:beta:gamma:delta:epsilon:zeta:eta:theta:iota:kappa:lambda:mu the quick brown fox jumps over the lazy dog" >abc.txt -testing "-b a,a,a" "cut -b 3,3,3 abc.txt" "e\np\ne\n" "" "" -testing "-b overlaps" "cut -b 1-3,2-5,7-9,9-10 abc.txt" \ +testcmd "-b a,a,a" "-b 3,3,3 abc.txt" "e\np\ne\n" "" "" +testcmd "-b overlaps" "-b 1-3,2-5,7-9,9-10 abc.txt" \ "one:to:th\nalphabeta\nthe qick \n" "" "" -testing "-b encapsulated" "cut -b 3-8,4-6 abc.txt" "e:two:\npha:be\ne quic\n" \ +testcmd "-b encapsulated" "-b 3-8,4-6 abc.txt" "e:two:\npha:be\ne quic\n" \ "" "" -testing "-bO overlaps" \ - "cut --output-delimiter ' ' -b 1-3,2-5,7-9,9-10 abc.txt" \ +testcmd "-bO overlaps" "--output-delimiter ' ' -b 1-3,2-5,7-9,9-10 abc.txt" \ "one:t o:th\nalpha beta\nthe q ick \n" "" "" -testing "high-low error" "cut -b 8-3 abc.txt 2>/dev/null || echo err" "err\n" \ +testcmd "high-low error" "-b 8-3 abc.txt 2>/dev/null || echo err" "err\n" \ "" "" -testing "-c a-b" "cut -c 4-10 abc.txt" ":two:th\nha:beta\n quick \n" "" "" -testing "-c a-" "cut -c 41- abc.txt" "\ntheta:iota:kappa:lambda:mu\ndog\n" "" "" -testing "-c -b" "cut -c -39 abc.txt" \ +testcmd "-c a-b" "-c 4-10 abc.txt" ":two:th\nha:beta\n quick \n" "" "" +testcmd "-c a-" "-c 41- abc.txt" "\ntheta:iota:kappa:lambda:mu\ndog\n" "" "" +testcmd "-c -b" "-c -39 abc.txt" \ "one:two:three:four:five:six:seven\nalpha:beta:gamma:delta:epsilon:zeta:eta\nthe quick brown fox jumps over the lazy\n" \ "" "" -testing "-c a" "cut -c 40 abc.txt" "\n:\n \n" "" "" -testing "-c a,b-c,d" "cut -c 3,5-7,10 abc.txt" "etwoh\npa:ba\nequi \n" "" "" -toyonly testing "-c japan.txt" 'cut -c 3-6,9-12 "$FILES/utf8/japan.txt"' \ +testcmd "-c a" "-c 40 abc.txt" "\n:\n \n" "" "" +testcmd "-c a,b-c,d" "-c 3,5-7,10 abc.txt" "etwoh\npa:ba\nequi \n" "" "" +toyonly testcmd "-c japan.txt" '-c 3-6,9-12 "$FILES/utf8/japan.txt"' \ "ガラスをられます\n" "" "" -toyonly testing "-C test1.txt" 'cut -C -1 "$FILES/utf8/test1.txt"' \ - "l̴̗̞̠\n" "" "" +toyonly testcmd "-C test1.txt" '-C -1 "$FILES/utf8/test1.txt"' "l̴̗̞̠\n" "" "" # substitute for awk toyonly testcmd "-DF" "-DF 2,7,5" \ @@ -47,24 +45,26 @@ Weather forecast for tonight : dark. Apple: you can buy better, but you can't pay more. Subcalifragilisticexpialidocious. Auntie Em: Hate you, hate Kansas. Took the dog. Dorothy." +toyonly testcmd "-DF 2" "-DF 7,1,3-6,2-5" \ + "seven one three four five six two three four five\n" "" \ + "one two three four five six seven eight nine\n" testcmd "empty field" "-d ':' -f 1-3" "a::b\n" "" "a::b\n" testcmd "empty field 2" "-d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n" -testing "-f a-" "cut -d ':' -f 5- abc.txt" "five:six:seven\nepsilon:zeta:eta:theta:iota:kappa:lambda:mu\nthe quick brown fox jumps over the lazy dog\n" "" "" +testcmd "-f a-" "-d ':' -f 5- abc.txt" "five:six:seven\nepsilon:zeta:eta:theta:iota:kappa:lambda:mu\nthe quick brown fox jumps over the lazy dog\n" "" "" -testing "show whole line with no delim" "cut -d ' ' -f 3 abc.txt" \ +testcmd "show whole line with no delim" "-d ' ' -f 3 abc.txt" \ "one:two:three:four:five:six:seven\nalpha:beta:gamma:delta:epsilon:zeta:eta:theta:iota:kappa:lambda:mu\nbrown\n" "" "" -testing "with echo, -c (a-b)" "echo 'ref_categorie=test' | cut -c 1-15 " "ref_categorie=t\n" "" "" -testing "with echo, -c (a)" "echo 'ref_categorie=test' | cut -c 14" "=\n" "" "" +testcmd "-c (a-b)" "-c 1-15 " "ref_categorie=t\n" "" "ref_categorie=test\n" +testcmd "-c (a)" "-c 14" "=\n" "" "ref_categorie=test\n" # Modifying abc.txt data as per new testcase echo "abcdefghijklmnopqrstuvwxyz" >abc.txt -testing "with -c (a,b,c)" "cut -c 4,5,20 abc.txt" "det\n" "" "" - -testing "with -b (a,b,c)" "cut -b 4,5,20 abc.txt" "det\n" "" "" +testcmd "-c (a,b,c)" "-c 4,5,20 abc.txt" "det\n" "" "" +testcmd "-b (a,b,c)" "-b 4,5,20 abc.txt" "det\n" "" "" # Modifying abc.txt data as per testcase echo "406378:Sales:Itorre:Jan @@ -72,13 +72,17 @@ echo "406378:Sales:Itorre:Jan 636496:Research:Ancholie:Mel 396082:Sales:Jucacion:Ed" >abc.txt -testing "with -d -f(:) -s" "cut -d: -f3 -s abc.txt" "Itorre\nNasium\nAncholie\nJucacion\n" "" "" - -testing "with -d -f( ) -s" "cut -d' ' -f3 -s abc.txt && echo yes" "yes\n" "" "" +testcmd "-d -f(:) -s" "-d: -f3 -s abc.txt" "Itorre\nNasium\nAncholie\nJucacion\n" "" "" +testcmd "-d -f( ) -s" "-d' ' -f3 -s abc.txt && echo yes" "yes\n" "" "" +testcmd "-d -f(a) -s" "-da -f3 -s abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" "" +testcmd "-d -f(a) -s -n" "-da -f3 -s -n abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" "" -testing "with -d -f(a) -s" "cut -da -f3 -s abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" "" +# Feature posix documents but nobody bothers to implement +toyonly testcmd "-nb" '-nb 8-17 "$FILES/utf8/japan.txt"' "ガラス\n" "" "" -testing "with -d -f(a) -s -n" "cut -da -f3 -s -n abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" "" +# Feature that is, as far as I can tell, totally undocumented? +testcmd "-d newline" "-d \$'\n' -f 2-3,5" "two\nthree\nfive\n" "" \ + 'one\ntwo\nthree\nfour\nfive\nsix\seven\n' # Removing abc.txt file for cleanup purpose rm abc.txt diff --git a/toys/posix/cut.c b/toys/posix/cut.c index 5072949d..4e9f56c1 100644 --- a/toys/posix/cut.c +++ b/toys/posix/cut.c @@ -42,26 +42,36 @@ GLOBALS( char *d, *O; struct arg_list *select[5]; // we treat them the same, so loop through + unsigned line; int pairs; regex_t reg; ) - // Apply selections to an input line, producing output static void cut_line(char **pline, long len) { - unsigned *pairs = (void *)toybuf; + unsigned *pairs = (void *)toybuf, wc; char *line; - int i, j; + int i, j, k; if (!pline) return; line = *pline; if (len && line[len-1]=='\n') line[--len] = 0; + TT.line++; // Loop through selections for (i=0; iend) { + if (i+1 == TT.pairs) return; + continue; + } + goto write_line; + } // input: start/end position, count=difference between them // output: s = start of string, len = bytes to output @@ -72,8 +82,20 @@ static void cut_line(char **pline, long len) count = end-start; // Find start and end of output string for the relevant selection type - if (FLAG(b)) s += start; - else if (FLAG(C)) { + if (FLAG(b)) { + if (!FLAG(n)) s += start; + else { + if (end>len) end = len; + for (sss = ss = s; (k = (ss-line))(j = utf8towc(&wc, ss, len))) ss++; + else { + if (((ss += j)-line)<=end) sss = ss; + if ((ss-line)<=start) s = ss; + } + } + if (!(count = sss-s)) continue; + } + } else if (FLAG(C)) { // crunch_str() currently assumes that combining characters get // escaped, to provide an unambiguous visual representation. // This assumes the input string is null terminated. @@ -85,8 +107,6 @@ static void cut_line(char **pline, long len) count = ss-s; } else if (FLAG(c)) { - unsigned wc; - char *sss; // Find start ss = line+len; @@ -132,6 +152,7 @@ static void cut_line(char **pline, long len) if (!j && end == start) { if (FLAG(D)) break; if (FLAG(s)) return; +write_line: fwrite(line, len, 1, stdout); break; } else if (!*s) continue; -- cgit v1.2.3