Add xxd -p and -r.

SELinux denials include hex-encoded paths in the log messages; xxd -r -p is a convenient way to decode them. The heuristics are a little weird to my mind, but match the documentation and observed behavior.
author: Elliott Hughes <enh@google.com> 2016-02-18 21:11:07 -0800
committer: Rob Landley <rob@landley.net> 2016-02-19 11:54:06 -0600
commit: 577b7cabd4bae054e314484249f28088b5f385ab (patch)
tree: 31599e39ad9b50922657ee1c01549de6cee1c310
parent: 42cad120baa22bc043bcce6f5aac74fa26a01aef (diff)
download: toybox-577b7cabd4bae054e314484249f28088b5f385ab.tar.gz
2 files changed, 92 insertions, 7 deletions
diff --git a/tests/xxd.test b/tests/xxd.test
index e036865a..68f52be4 100644
--- a/tests/xxd.test
+++ b/tests/xxd.test
@@ -25,4 +25,23 @@ testing "xxd -c 8 -g 4 file1" "xxd -c 8 -g 4 file1" \
 testing "xxd -c 8 -g 3 file1" "xxd -c 8 -g 3 file1" \
     "00000000: 746869 732069 7320 this is \n00000008: 736f6d 652074 6578 some tex\n00000010: 740a               t.\n" "" ""
 
+testing "xxd -p" "xxd -p file1" "7468697320697320736f6d6520746578740a\n" "" ""
+
+testing "xxd -r" "xxd file1 | xxd -r" "this is some text\n" "" ""
+testing "xxd -r -p" "xxd -p file1 | xxd -r -p" "this is some text\n" "" ""
+
+testing "xxd -r garbage" "echo '0000: 68 65 6c6c 6fxxxx' | xxd -r -" "hello" "" ""
+
+# -r will only read -c bytes (default 16) before skipping to the next line,
+# ignoring the rest.
+testing "xxd -r long" \
+    "echo '0000: 40404040404040404040404040404040404040404040404040404040404040404040404040404040' | xxd -r -" \
+    "@@@@@@@@@@@@@@@@" "" ""
+
+# -r -p ignores the usual -p 30-byte/line limit (or any limit set by -c) and
+# will take as many bytes as you give it.
+testing "xxd -r -p long" \
+    "echo '40404040404040404040404040404040404040404040404040404040404040404040404040404040' | xxd -r -p -" \
+    "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" "" ""
+
 rm file1 file2
diff --git a/toys/other/xxd.c b/toys/other/xxd.c
index e9ad8393..0250f8f8 100644
--- a/toys/other/xxd.c
+++ b/toys/other/xxd.c
@@ -8,13 +8,13 @@
  * TODO: support for reversing a hexdump back into the original data.
  * TODO: -s seek
 
-USE_XXD(NEWTOY(xxd, ">1c#<1>4096=16l#g#<1=2", TOYFLAG_USR|TOYFLAG_BIN))
+USE_XXD(NEWTOY(xxd, ">1c#<1>4096=16l#g#<1=2pr", TOYFLAG_USR|TOYFLAG_BIN))
 
 config XXD
   bool "xxd"
   default y
   help
-    usage: xxd [-c n] [-g n] [-l n] [file]
+    usage: xxd [-c n] [-g n] [-l n] [-p] [-r] [file]
 
     Hexdump a file to stdout.  If no file is listed, copy from stdin.
     Filename "-" is a synonym for stdin.
@@ -22,6 +22,8 @@ config XXD
     -c n	Show n bytes per line (default 16).
     -g n	Group bytes by adding a ' ' every n bytes (default 2).
     -l n	Limit of n bytes before stopping (default is no limit).
+    -p	Plain hexdump (30 bytes/line, no grouping).
+    -r	Reverse operation: turn a hexdump into a binary file.
 */
 
 #define FOR_xxd
@@ -39,7 +41,7 @@ static void do_xxd(int fd, char *name)
   int i, len, space;
 
   while (0<(len = readall(fd, toybuf, (TT.l && TT.l-pos<TT.c)?TT.l-pos:TT.c))) {
-    printf("%08llx: ", pos);
+    if (!(toys.optflags&FLAG_p)) printf("%08llx: ", pos);
     pos += len;
     space = 2*TT.c+TT.c/TT.g+1;
 
@@ -51,15 +53,79 @@ static void do_xxd(int fd, char *name)
       }
     }
 
-    printf("%*s", space, "");
-    for (i=0; i<len; i++)
-      putchar((toybuf[i]>=' ' && toybuf[i]<='~') ? toybuf[i] : '.');
+    if (!(toys.optflags&FLAG_p)) {
+      printf("%*s", space, "");
+      for (i=0; i<len; i++)
+        putchar((toybuf[i]>=' ' && toybuf[i]<='~') ? toybuf[i] : '.');
+    }
     putchar('\n');
   }
   if (len<0) perror_exit("read");
 }
 
+static int dehex(char ch)
+{
+  if (ch >= '0' && ch <= '9') return ch - '0';
+  if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10;
+  if (ch >= 'A' && ch <= 'F') return ch - 'a' + 10;
+  return (ch == '\n') ? -2 : -1;
+}
+
+static void do_xxd_reverse(int fd, char *name)
+{
+  FILE *fp = xfdopen(fd, "r");
+
+  while (!feof(fp)) {
+    int col = 0;
+    int tmp;
+
+    // Each line of a non-plain hexdump starts with an offset/address.
+    if (!(toys.optflags&FLAG_p)) {
+      long long pos;
+
+      if (fscanf(fp, "%llx: ", &pos) == 1) {
+        if (fseek(stdout, pos, SEEK_SET) != 0) {
+          // TODO: just write out zeros if non-seekable?
+          perror_exit("%s: seek failed", name);
+        }
+      }
+    }
+
+    // A plain hexdump can have as many bytes per line as you like,
+    // but a non-plain hexdump assumes garbage after it's seen the
+    // specified number of bytes.
+    while (toys.optflags&FLAG_p || col < TT.c) {
+      int n1, n2;
+
+      // If we're at EOF or EOL or we read some non-hex...
+      if ((n1 = n2 = dehex(fgetc(fp))) < 0 || (n2 = dehex(fgetc(fp))) < 0) {
+        // If we're at EOL, start on that line.
+        if (n1 == -2 || n2 == -2) continue;
+        // Otherwise, skip to the next line.
+        break;
+      }
+
+      fputc((n1 << 4) | (n2 & 0xf), stdout);
+      col++;
+
+      // Is there any grouping going on? Ignore a single space.
+      tmp = fgetc(fp);
+      if (tmp != ' ') ungetc(tmp, fp);
+    }
+
+    // Skip anything else on this line (such as the ASCII dump).
+    while ((tmp = fgetc(fp)) != EOF && tmp != '\n')
+      ;
+  }
+  if (ferror(fp)) perror_msg_raw(name);
+
+  fclose(fp);
+}
+
 void xxd_main(void)
 {
-  loopfiles(toys.optargs, do_xxd);
+  // Plain style is 30 bytes/line, no grouping.
+  if (toys.optflags&FLAG_p) TT.c = TT.g = 30;
+
+  loopfiles(toys.optargs, toys.optflags&FLAG_r ? do_xxd_reverse : do_xxd);
 }
author	Elliott Hughes <enh@google.com>	2016-02-18 21:11:07 -0800
committer	Rob Landley <rob@landley.net>	2016-02-19 11:54:06 -0600
commit	577b7cabd4bae054e314484249f28088b5f385ab (patch)
tree	31599e39ad9b50922657ee1c01549de6cee1c310
parent	42cad120baa22bc043bcce6f5aac74fa26a01aef (diff)
download	toybox-577b7cabd4bae054e314484249f28088b5f385ab.tar.gz