diff options
-rw-r--r-- | toys/pending/file.c | 232 |
1 files changed, 97 insertions, 135 deletions
diff --git a/toys/pending/file.c b/toys/pending/file.c index 1ebafdb0..2c2f2864 100644 --- a/toys/pending/file.c +++ b/toys/pending/file.c @@ -22,177 +22,143 @@ GLOBALS( int max_name_len; ) -// TODO: all the ELF magic numbers are available in <elf.h> --- use that? - -static char *elf_arch(int e_machine) -{ - // TODO: include obsolete stuff we'll never see, like "PDP-10" and "VAX"? - switch (e_machine) { - case 3: return "Intel 80386"; - case 8: return "MIPS"; - case 40: return "ARM"; - case 62: return "x86-64"; - case 183: return "ARM aarch64"; - default: return NULL; - } -} - -static int64_t elf_int(int endian, char *data, int bytes) -{ - if (endian == 1) return peek_le(data, bytes); - return peek_be(data, bytes); -} - static void do_elf_file() { - int elf_endian = toybuf[5]; - int e_type, e_machine; - - xprintf("ELF"); + int elf_endian = toybuf[5], e_type, e_machine, i; + int64_t (*elf_int)(void *ptr, unsigned size) = peek_le; + // Values from include/linux/elf-em.h (plus arch/*/include/asm/elf.h) + // Names are linux/arch/ directory name + struct {int val; char *name;} type[] = {{0x9026, "alpha"}, + {40, "arm"}, {183, "arm"}, {0x18ad, "avr32"}, {106, "blackfin"}, + {76, "cris"}, {0x5441, "frv"}, {46, "h8300"}, {50, "ia64"},//ia intel ftaghn + {88, "m32r"}, {4, "m68k"}, {0xbaab, "microblaze"}, {8, "mips"}, + {10, "mips"}, {89, "mn10300"}, {15, "parisc"}, {22, "s390"}, + {135, "score"}, {42, "sh"}, {2, "sparc"}, {18, "sparc"}, {43, "sparc"}, + {187, "tile"}, {188, "tile"}, {191, "tile"}, {3, "x86"}, {6, "x86"}, + {62, "x86"}, {94, "xtensa"}, {0xabc7, "xtensa"}}; + + xprintf("ELF "); // "64-bit" - if (toybuf[4] == 1) xprintf(" 32-bit"); - else if (toybuf[4] == 2) xprintf(" 64-bit"); - else xprintf(" (invalid class %d)", toybuf[4]); + if (toybuf[4] == 1) xprintf("32-bit "); + else if (toybuf[4] == 2) xprintf("64-bit "); + else xprintf("(bad class %d)", toybuf[4]); // "LSB" - if (elf_endian == 1) xprintf(" LSB"); - else if (elf_endian == 2) xprintf(" MSB"); - else xprintf("(invalid endian %d) ", elf_endian); - - if (elf_endian == 1 || elf_endian == 2) { - char *arch; - - // ", executable" - e_type = elf_int(elf_endian, &toybuf[0x10], 2); - if (e_type == 1) xprintf(" relocatable"); - else if (e_type == 2) xprintf(" executable"); - else if (e_type == 3) xprintf(" shared object"); - else if (e_type == 4) xprintf(" core dump"); - else xprintf(" (invalid type %d)", e_type); - - // ", x86-64" - e_machine = elf_int(elf_endian, &toybuf[0x12], 2); - arch = elf_arch(e_machine); - if (arch) xprintf(", %s", arch); - else xprintf(", (unknown arch %d)", e_machine); + if (elf_endian == 1) xprintf("LSB "); + else if (elf_endian == 2) { + xprintf("MSB "); + elf_int = peek_be; + } else { + xprintf("(bad endian %d)\n", elf_endian); + + // At this point we can't parse remaining fields. + return; } + // ", executable" + e_type = elf_int(&toybuf[0x10], 2); + if (e_type == 1) xprintf("relocatable"); + else if (e_type == 2) xprintf("executable"); + else if (e_type == 3) xprintf("shared object"); + else if (e_type == 4) xprintf("core dump"); + else xprintf("(invalid type %d)", e_type); + + // ", x86-64" + e_machine = elf_int(&toybuf[0x12], 2); + for (i = 0; i<ARRAY_LEN(type); i++) if (e_machine == type[i].val) break; + if (i<ARRAY_LEN(type)) xprintf(", %s", type[i].name); + else xprintf(", (unknown arch %d)", e_machine); + // "version 1" xprintf(", version %d", toybuf[6]); // " (SYSV)" // TODO: will we ever meet any of the others in practice? - if (toybuf[7] == 0) xprintf(" (SYSV)"); + if (!toybuf[7]) xprintf(" (SYSV)"); else xprintf(" (OS %d)", toybuf[7]); // TODO: we'd need to actually parse the ELF file to report the rest... // ", dynamically linked" // " (uses shared libs)" - // ", for GNU/Linux 2.6.24" + // ", for Linux 2.6.24" // ", BuildID[sha1]=SHA" // ", stripped" - xputs(""); + xputc('\n'); } -// https://www.w3.org/TR/PNG/#6Colour-values -static char *png_color_type(int color_type) +static void do_regular_file(int fd, char *name) { - switch (color_type) { - case 0: return "grayscale"; - case 2: return "color RGB"; - case 3: return "indexed color"; - case 4: return "grayscale with alpha"; - case 6: return "color RGBA"; - default: return "unknown"; - } -} + char *s; + int len = read(fd, s = toybuf, sizeof(toybuf)-256); -static void do_png_file() -{ - // PNG is big-endian: https://www.w3.org/TR/PNG/#7Integers-and-byte-order - int chunk_length = peek_be(&toybuf[8], 4); - - xprintf("PNG image data"); - - // The IHDR chunk comes first. - // https://www.w3.org/TR/PNG/#11IHDR - if (chunk_length == 13 && memcmp(&toybuf[12], "IHDR", 4) == 0) { - int width = peek_be(&toybuf[16], 4); - int height = peek_be(&toybuf[20], 4); - int bits = toybuf[24] & 0xff; - int type = toybuf[25] & 0xff; - int interlaced = toybuf[28] & 0xff; - - xprintf(", %d x %d, %d-bit/%s, %s", width, height, bits, - png_color_type(type), - interlaced ? "interlaced" : "non-interlaced"); - } + if (len<0) perror_msg("%s", name); - xputs(""); -} + if (len>20 && strstart(&s, "\177ELF")) { + do_elf_file(len); + } else if (len>28 && strstart(&s, "\x89PNG\x0d\x0a\x1a\x0a")) { + // PNG is big-endian: https://www.w3.org/TR/PNG/#7Integers-and-byte-order + int chunk_length = peek_be(s, 4); -static void do_gif_file() -{ - // https://www.w3.org/Graphics/GIF/spec-gif89a.txt - int width = peek_le(&toybuf[6], 2); - int height = peek_le(&toybuf[8], 2); + xprintf("PNG image data"); - xprintf("GIF image data, %d x %d\n", width, height); -} + // The IHDR chunk comes first: https://www.w3.org/TR/PNG/#11IHDR + s += 4; + if (chunk_length == 13 && strstart(&s, "IHDR")) { + // https://www.w3.org/TR/PNG/#6Colour-values + char *c = 0, *colors[] = {"grayscale", 0, "color RGB", "indexed color", + "grayscale with alpha", 0, "color RGBA"}; -static void do_jpeg_file() -{ - // TODO: parsing JPEG for width/height is harder than GIF or PNG. - xprintf("JPEG image data\n"); -} + if (s[9]<ARRAY_LEN(colors)) c = colors[s[9]]; + if (!c) c = "unknown"; -static void do_java_class_file() -{ - // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html - int minor = peek_be(&toybuf[4], 2); - int major = peek_be(&toybuf[6], 2); + xprintf(", %d x %d, %d-bit/%s, %sinterlaced", (int)peek_be(s, 4), + (int)peek_be(s+4, 4), s[8], c, s[12] ? "" : "non-"); + } - xprintf("Java class file, version %d.%d\n", major, minor); -} + xputc('\n'); -static void do_regular_file(int fd, char *name) -{ - int len = read(fd, toybuf, sizeof(toybuf)); + // https://www.w3.org/Graphics/GIF/spec-gif89a.txt + } else if (len>16 && (strstart(&s, "GIF87a") || strstart(&s, "GIF89a"))) + xprintf("GIF image data, %d x %d\n", + (int)peek_le(s, 2), (int)peek_le(s+8, 2)); - if (len<0) perror_msg("cannot open"); + // TODO: parsing JPEG for width/height is harder than GIF or PNG. + else if (len>32 && memcmp(toybuf, "\xff\xd8", 2) == 0) + xprintf("JPEG image data\n"); - if (len>20 && memcmp(toybuf, "\177ELF", 4) == 0) { - do_elf_file(len); - } else if (len>28 && memcmp(toybuf, "\x89PNG\x0d\x0a\x1a\x0a", 8) == 0) { - do_png_file(); - } else if (len>16 && (memcmp(toybuf, "GIF87a", 6) == 0 || - memcmp(toybuf, "GIF89a", 6) == 0)) { - do_gif_file(); - } else if (len>32 && memcmp(toybuf, "\xff\xd8", 2) == 0) { - do_jpeg_file(); - } else if (len>8 && memcmp(toybuf, "\xca\xfe\xba\xbe", 4) == 0) { - do_java_class_file(); + // https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html + else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe")) + xprintf("Java class file, version %d.%d\n", + (int)peek_be(s+6, 2), (int)peek_be(s, 2)); // TODO: cpio archive. // TODO: tar archive. // TODO: zip/jar/apk archive. - } else { - char *what = "ASCII text"; - int i; + else { + char *what = 0; + int i, bytes; - // TODO: report which interpreter? - if (strncmp(toybuf, "#!", 2) == 0) what = "commands text"; + // If shell script, report which interpreter + if (len>3 && strstart(&s, "#!")) { + for (what = s; (s-toybuf)<len && !isspace(*s); s++); + strcpy(s, " script"); - // TODO: try UTF-8 too before falling back to "data". - for (i = 0; i < len; ++i) { + // Distinguish ASCII text, UTF-8 text, or data + } else for (i = 0; i<len; ++i) { if (!(isprint(toybuf[i]) || isspace(toybuf[i]))) { - what = "data"; - break; + wchar_t wc; + if ((bytes = mbrtowc(&wc, s+i, len-i, 0))>0 && wcwidth(wc)>=0) { + i += bytes-1; + if (!what) what = "UTF-8 text"; + } else { + what = "data"; + break; + } } } - xputs(what); + xputs(what ? what : "ASCII text"); } } @@ -219,19 +185,15 @@ static void do_file(int fd, char *name) xputs(what); } -static void init_max_name_len() +void file_main(void) { char **name; - int name_len; for (name = toys.optargs; *name; ++name) { - name_len = strlen(*name); + int name_len = strlen(*name); + if (name_len > TT.max_name_len) TT.max_name_len = name_len; } -} -void file_main(void) -{ - init_max_name_len(); loopfiles(toys.optargs, do_file); } |