aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xtests/cut.test56
-rw-r--r--toys/posix/cut.c37
2 files changed, 59 insertions, 34 deletions
diff --git a/tests/cut.test b/tests/cut.test
index 8d8c4ba1..889fc186 100755
--- a/tests/cut.test
+++ b/tests/cut.test
@@ -13,29 +13,27 @@ echo "one:two:three:four:five:six:seven
alpha:beta:gamma:delta:epsilon:zeta:eta:theta:iota:kappa:lambda:mu
the quick brown fox jumps over the lazy dog" >abc.txt
-testing "-b a,a,a" "cut -b 3,3,3 abc.txt" "e\np\ne\n" "" ""
-testing "-b overlaps" "cut -b 1-3,2-5,7-9,9-10 abc.txt" \
+testcmd "-b a,a,a" "-b 3,3,3 abc.txt" "e\np\ne\n" "" ""
+testcmd "-b overlaps" "-b 1-3,2-5,7-9,9-10 abc.txt" \
"one:to:th\nalphabeta\nthe qick \n" "" ""
-testing "-b encapsulated" "cut -b 3-8,4-6 abc.txt" "e:two:\npha:be\ne quic\n" \
+testcmd "-b encapsulated" "-b 3-8,4-6 abc.txt" "e:two:\npha:be\ne quic\n" \
"" ""
-testing "-bO overlaps" \
- "cut --output-delimiter ' ' -b 1-3,2-5,7-9,9-10 abc.txt" \
+testcmd "-bO overlaps" "--output-delimiter ' ' -b 1-3,2-5,7-9,9-10 abc.txt" \
"one:t o:th\nalpha beta\nthe q ick \n" "" ""
-testing "high-low error" "cut -b 8-3 abc.txt 2>/dev/null || echo err" "err\n" \
+testcmd "high-low error" "-b 8-3 abc.txt 2>/dev/null || echo err" "err\n" \
"" ""
-testing "-c a-b" "cut -c 4-10 abc.txt" ":two:th\nha:beta\n quick \n" "" ""
-testing "-c a-" "cut -c 41- abc.txt" "\ntheta:iota:kappa:lambda:mu\ndog\n" "" ""
-testing "-c -b" "cut -c -39 abc.txt" \
+testcmd "-c a-b" "-c 4-10 abc.txt" ":two:th\nha:beta\n quick \n" "" ""
+testcmd "-c a-" "-c 41- abc.txt" "\ntheta:iota:kappa:lambda:mu\ndog\n" "" ""
+testcmd "-c -b" "-c -39 abc.txt" \
"one:two:three:four:five:six:seven\nalpha:beta:gamma:delta:epsilon:zeta:eta\nthe quick brown fox jumps over the lazy\n" \
"" ""
-testing "-c a" "cut -c 40 abc.txt" "\n:\n \n" "" ""
-testing "-c a,b-c,d" "cut -c 3,5-7,10 abc.txt" "etwoh\npa:ba\nequi \n" "" ""
-toyonly testing "-c japan.txt" 'cut -c 3-6,9-12 "$FILES/utf8/japan.txt"' \
+testcmd "-c a" "-c 40 abc.txt" "\n:\n \n" "" ""
+testcmd "-c a,b-c,d" "-c 3,5-7,10 abc.txt" "etwoh\npa:ba\nequi \n" "" ""
+toyonly testcmd "-c japan.txt" '-c 3-6,9-12 "$FILES/utf8/japan.txt"' \
"ガラスをられます\n" "" ""
-toyonly testing "-C test1.txt" 'cut -C -1 "$FILES/utf8/test1.txt"' \
- "l̴̗̞̠\n" "" ""
+toyonly testcmd "-C test1.txt" '-C -1 "$FILES/utf8/test1.txt"' "l̴̗̞̠\n" "" ""
# substitute for awk
toyonly testcmd "-DF" "-DF 2,7,5" \
@@ -47,24 +45,26 @@ Weather forecast for tonight : dark.
Apple: you can buy better, but you can't pay more.
Subcalifragilisticexpialidocious.
Auntie Em: Hate you, hate Kansas. Took the dog. Dorothy."
+toyonly testcmd "-DF 2" "-DF 7,1,3-6,2-5" \
+ "seven one three four five six two three four five\n" "" \
+ "one two three four five six seven eight nine\n"
testcmd "empty field" "-d ':' -f 1-3" "a::b\n" "" "a::b\n"
testcmd "empty field 2" "-d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n"
-testing "-f a-" "cut -d ':' -f 5- abc.txt" "five:six:seven\nepsilon:zeta:eta:theta:iota:kappa:lambda:mu\nthe quick brown fox jumps over the lazy dog\n" "" ""
+testcmd "-f a-" "-d ':' -f 5- abc.txt" "five:six:seven\nepsilon:zeta:eta:theta:iota:kappa:lambda:mu\nthe quick brown fox jumps over the lazy dog\n" "" ""
-testing "show whole line with no delim" "cut -d ' ' -f 3 abc.txt" \
+testcmd "show whole line with no delim" "-d ' ' -f 3 abc.txt" \
"one:two:three:four:five:six:seven\nalpha:beta:gamma:delta:epsilon:zeta:eta:theta:iota:kappa:lambda:mu\nbrown\n" "" ""
-testing "with echo, -c (a-b)" "echo 'ref_categorie=test' | cut -c 1-15 " "ref_categorie=t\n" "" ""
-testing "with echo, -c (a)" "echo 'ref_categorie=test' | cut -c 14" "=\n" "" ""
+testcmd "-c (a-b)" "-c 1-15 " "ref_categorie=t\n" "" "ref_categorie=test\n"
+testcmd "-c (a)" "-c 14" "=\n" "" "ref_categorie=test\n"
# Modifying abc.txt data as per new testcase
echo "abcdefghijklmnopqrstuvwxyz" >abc.txt
-testing "with -c (a,b,c)" "cut -c 4,5,20 abc.txt" "det\n" "" ""
-
-testing "with -b (a,b,c)" "cut -b 4,5,20 abc.txt" "det\n" "" ""
+testcmd "-c (a,b,c)" "-c 4,5,20 abc.txt" "det\n" "" ""
+testcmd "-b (a,b,c)" "-b 4,5,20 abc.txt" "det\n" "" ""
# Modifying abc.txt data as per testcase
echo "406378:Sales:Itorre:Jan
@@ -72,13 +72,17 @@ echo "406378:Sales:Itorre:Jan
636496:Research:Ancholie:Mel
396082:Sales:Jucacion:Ed" >abc.txt
-testing "with -d -f(:) -s" "cut -d: -f3 -s abc.txt" "Itorre\nNasium\nAncholie\nJucacion\n" "" ""
-
-testing "with -d -f( ) -s" "cut -d' ' -f3 -s abc.txt && echo yes" "yes\n" "" ""
+testcmd "-d -f(:) -s" "-d: -f3 -s abc.txt" "Itorre\nNasium\nAncholie\nJucacion\n" "" ""
+testcmd "-d -f( ) -s" "-d' ' -f3 -s abc.txt && echo yes" "yes\n" "" ""
+testcmd "-d -f(a) -s" "-da -f3 -s abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" ""
+testcmd "-d -f(a) -s -n" "-da -f3 -s -n abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" ""
-testing "with -d -f(a) -s" "cut -da -f3 -s abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" ""
+# Feature posix documents but nobody bothers to implement
+toyonly testcmd "-nb" '-nb 8-17 "$FILES/utf8/japan.txt"' "ガラス\n" "" ""
-testing "with -d -f(a) -s -n" "cut -da -f3 -s -n abc.txt" "n\nsium:Jim\n\ncion:Ed\n" "" ""
+# Feature that is, as far as I can tell, totally undocumented?
+testcmd "-d newline" "-d \$'\n' -f 2-3,5" "two\nthree\nfive\n" "" \
+ 'one\ntwo\nthree\nfour\nfive\nsix\seven\n'
# Removing abc.txt file for cleanup purpose
rm abc.txt
diff --git a/toys/posix/cut.c b/toys/posix/cut.c
index 5072949d..4e9f56c1 100644
--- a/toys/posix/cut.c
+++ b/toys/posix/cut.c
@@ -42,26 +42,36 @@ GLOBALS(
char *d, *O;
struct arg_list *select[5]; // we treat them the same, so loop through
+ unsigned line;
int pairs;
regex_t reg;
)
-
// Apply selections to an input line, producing output
static void cut_line(char **pline, long len)
{
- unsigned *pairs = (void *)toybuf;
+ unsigned *pairs = (void *)toybuf, wc;
char *line;
- int i, j;
+ int i, j, k;
if (!pline) return;
line = *pline;
if (len && line[len-1]=='\n') line[--len] = 0;
+ TT.line++;
// Loop through selections
for (i=0; i<TT.pairs; i++) {
unsigned start = pairs[2*i], end = pairs[(2*i)+1], count;
- char *s = line, *ss;
+ char *s = line, *ss, *sss;
+
+ // when the delimiter is \n output lines.
+ if (*TT.d == '\n') {
+ if (TT.line<start || TT.line>end) {
+ if (i+1 == TT.pairs) return;
+ continue;
+ }
+ goto write_line;
+ }
// input: start/end position, count=difference between them
// output: s = start of string, len = bytes to output
@@ -72,8 +82,20 @@ static void cut_line(char **pline, long len)
count = end-start;
// Find start and end of output string for the relevant selection type
- if (FLAG(b)) s += start;
- else if (FLAG(C)) {
+ if (FLAG(b)) {
+ if (!FLAG(n)) s += start;
+ else {
+ if (end>len) end = len;
+ for (sss = ss = s; (k = (ss-line))<end;) {
+ if (0>(j = utf8towc(&wc, ss, len))) ss++;
+ else {
+ if (((ss += j)-line)<=end) sss = ss;
+ if ((ss-line)<=start) s = ss;
+ }
+ }
+ if (!(count = sss-s)) continue;
+ }
+ } else if (FLAG(C)) {
// crunch_str() currently assumes that combining characters get
// escaped, to provide an unambiguous visual representation.
// This assumes the input string is null terminated.
@@ -85,8 +107,6 @@ static void cut_line(char **pline, long len)
count = ss-s;
} else if (FLAG(c)) {
- unsigned wc;
- char *sss;
// Find start
ss = line+len;
@@ -132,6 +152,7 @@ static void cut_line(char **pline, long len)
if (!j && end == start) {
if (FLAG(D)) break;
if (FLAG(s)) return;
+write_line:
fwrite(line, len, 1, stdout);
break;
} else if (!*s) continue;