From a3da7efae55c6e2d3ff66fdc476b35f5bbec09e5 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Fri, 27 Dec 2019 12:51:09 -0800 Subject: Implement readelf(1). Basic readelf(1) implementation, with output close enough to the binutils version to be usable with scripts that expect the binutils version. This started as an implementation of nm(1) until I realized that I almost always want readelf instead, and that you actually have to do much of the work needed for readelf just to implement nm. Arguably nm (being part of POSIX) belongs in toybox while readelf doesn't. An argument could also be made that neither really belongs in toybox, belonging in a separate set of development tools (such as binutils or the LLVM binutils). Doesn't support most of the architecture-specific stuff, most notably relocations, but is aware of things like ARM exidx sections and the common register state notes in core dumps for the "big four" architectures: arm, arm64, x86, and x86-64. Doesn't support symbol versions (but probably should). Doesn't support section groups or the -t "section details" (which is a long form of -S "section headers" that I've never seen used in practice and which isn't part of -a). Doesn't support dumping unwind info or the hash table bucket histograms. Reuses the table of ELF architectures from file(1). Not fuzzed, but successfully parses all the ELF files in my Ubuntu 18.04 system's lib directories. Attempts to exit with an error when presented with an invalid ELF file rather than struggle on as binutils seems to. --- toys/pending/readelf.c | 589 +++++++++++++++++++++++++++++++++++++++++++++++++ toys/posix/file.c | 22 +- 2 files changed, 591 insertions(+), 20 deletions(-) create mode 100644 toys/pending/readelf.c (limited to 'toys') diff --git a/toys/pending/readelf.c b/toys/pending/readelf.c new file mode 100644 index 00000000..838e37d7 --- /dev/null +++ b/toys/pending/readelf.c @@ -0,0 +1,589 @@ +/* readelf.c - display information about ELF files. + * + * Copyright 2019 The Android Open Source Project + * + * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/nm.html + +USE_READELF(NEWTOY(readelf, "<1(dyn-syms)adhlnp:SsWx:", TOYFLAG_USR|TOYFLAG_BIN)) + +config READELF + bool "readelf" + default y + help + usage: readelf [-adhlnSsW] [-p SECTION] [-x SECTION] [file...] + + Displays information about ELF files. + + -a Equivalent to -dhlnSs + -d Show dynamic section + -h Show ELF header + -l Show program headers + -n Show notes + -p S Dump strings found in named/numbered section + -S Show section headers + -s Show symbol tables (.dynsym and .symtab) + -W Don't truncate fields (default in toybox) + -x S Hex dump of named/numbered section + + --dyn-syms Show just .dynsym symbol table +*/ + +#define FOR_readelf +#include "toys.h" + +GLOBALS( + char *x, *p; + + char *elf, *shstrtab, *f; + off_t shoff, phoff, size; + int bits, shnum, shentsize, phentsize; + int64_t (*elf_int)(void *ptr, unsigned size); +) + +// Section header. +struct sh { + int type, link, info; + long flags, addr, offset, size, addralign, entsize; + char *name; +}; + +// Program header. +struct ph { + int type, flags; + long offset, vaddr, paddr, filesz, memsz, align; +}; + +static void get_sh(int i, struct sh *s) +{ + char *shdr = TT.elf+TT.shoff+i*TT.shentsize; + + if (i >= TT.shnum || shdr > TT.elf+TT.size-TT.shentsize) { + error_exit("%s: bad shdr %d",TT.f,i); + } + + s->type = TT.elf_int(shdr+4, 4); + s->flags = TT.elf_int(shdr+8, 4*(TT.bits+1)); + s->addr = TT.elf_int(shdr+8+4*(TT.bits+1), 4*(TT.bits+1)); + s->offset = TT.elf_int(shdr+8+8*(TT.bits+1), 4*(TT.bits+1)); + s->size = TT.elf_int(shdr+8+12*(TT.bits+1), 4*(TT.bits+1)); + s->link = TT.elf_int(shdr+8+16*(TT.bits+1), 4); + s->info = TT.elf_int(shdr+12+16*(TT.bits+1), 4); + s->addralign = TT.elf_int(shdr+16+16*(TT.bits+1), 4*(TT.bits+1)); + s->entsize = TT.elf_int(shdr+16+20*(TT.bits+1), 4*(TT.bits+1)); + + if (!TT.shstrtab) s->name = "?"; + else { + s->name = TT.shstrtab + TT.elf_int(shdr, 4); + if (s->name >= TT.elf+TT.size) error_exit("%s: bad shdr name %d",TT.f,i); + if (s->offset >= TT.size-s->size && s->type != 8 /*SHT_NOBITS*/) + error_exit("%s: bad section %d",TT.f,i); + } +} + +static int find_section(char *spec, struct sh *s) +{ + char *end; + int i; + + // Valid section number? + errno = 0; + i = strtoul(spec, &end, 0); + if (!errno && !*end && i < TT.shnum) { + get_sh(i, s); + return 1; + } + + // Search the section names. + for (i=0; iname, spec)) return 1; + } + + error_msg("%s: no section '%s", TT.f, spec); + return 0; +} + +static void get_ph(int i, struct ph *ph) +{ + char *phdr = TT.elf+TT.phoff+i*TT.phentsize; + + if (phdr > TT.elf+TT.size-TT.phentsize) error_exit("%s: bad phdr %d",TT.f,i); + + // Elf64_Phdr reordered fields. + ph->type = TT.elf_int(phdr, 4); + if (TT.bits) { + ph->flags = TT.elf_int(phdr+=4, 4); + ph->offset = TT.elf_int(phdr+=4, 8); + ph->vaddr = TT.elf_int(phdr+=8, 8); + ph->paddr = TT.elf_int(phdr+=8, 8); + ph->filesz = TT.elf_int(phdr+=8, 8); + ph->memsz = TT.elf_int(phdr+=8, 8); + ph->align = TT.elf_int(phdr+=8, 8); + } else { + ph->offset = TT.elf_int(phdr+=4, 4); + ph->vaddr = TT.elf_int(phdr+=4, 4); + ph->paddr = TT.elf_int(phdr+=4, 4); + ph->filesz = TT.elf_int(phdr+=4, 4); + ph->memsz = TT.elf_int(phdr+=4, 4); + ph->flags = TT.elf_int(phdr+=4, 4); + ph->align = TT.elf_int(phdr+=4, 4); + } +} + +#define MAP(...) __VA_ARGS__ +#define DECODER(name, values) \ + static char *name(int type) { \ + static char unknown[20]; \ + struct {int v; char *s;} a[] = values; \ + int i; \ + \ + for (i=0; ioffset, *ndx; + int sym_size = (TT.bits ? 24 : 16), numsym = table->size/sym_size, i; + + if (numsym == 0) return; + + xputc('\n'); + printf("Symbol table '%s' contains %d entries:\n" + " Num: %*s Size Type Bind Vis Ndx Name\n", + table->name, numsym, 5+8*TT.bits, "Value"); + for (i=0; ioffset + st_name; + if (name >= TT.elf+TT.size) error_exit("%s: bad symbol name", TT.f); + + if (!st_shndx) ndx = "UND"; + else if (st_shndx==0xfff1) ndx = "ABS"; + else sprintf(ndx = toybuf, "%d", st_shndx); + + // TODO: look up and show any symbol versions with @ or @@. + + printf("%6d: %0*x %5ld %-7s %-6s %-9s%3s %s\n", i, 8*(TT.bits+1), + st_value, st_size, stt_type(st_info & 0xf), stb_type(st_info >> 4), + stv_type(st_other & 3), ndx, name); + symtab += sym_size; + } +} + +static void show_notes(long offset, long size) +{ + char *note = TT.elf + offset; + + printf(" %-20s %10s\tDescription\n", "Owner", "Data size"); + while (note < TT.elf+offset+size) { + int namesz = TT.elf_int(note, 4), descsz = TT.elf_int(note+4, 4), + type = TT.elf_int(note+8, 4), j = 0; + char *name = note+12; + + printf(" %-20.*s 0x%08x\t", namesz, name, descsz); + if (!memcmp(name, "GNU", 4)) { + if (type == 1) { + printf("NT_GNU_ABI_TAG\tOS: %s, ABI: %ld.%ld.%ld", + !TT.elf_int(note+16, 4)?"Linux":"?", + TT.elf_int(note+20, 4), TT.elf_int(note+24, 4), + TT.elf_int(note+28, 4)), j=1; + } else if (type == 3) { + printf("NT_GNU_BUILD_ID\t"); + for (;j=132) printf(", NDK %.64s (%.64s)",note+24,note+24+64); + } + } else if (!memcmp(name, "CORE", 5) || !memcmp(name, "LINUX", 6)) { + char *desc = *name=='C' ? nt_type_core(type) : nt_type_linux(type); + + if (*desc != '0') printf("%s", desc), j=1; + } + + // If we didn't do custom output above, show a hex dump. + if (!j) { + printf("0x%x\t", type); + for (;j 1 || endian < 1 || endian > 2 || version != 1) { + error_msg("%s: bad ELF", TT.f); + return; + } + + elf_type = TT.elf_int(TT.elf+16, 2); + entry = TT.elf_int(TT.elf+24, 4+4*TT.bits); + TT.phoff = TT.elf_int(TT.elf+28+4*TT.bits, 4+4*TT.bits); + TT.shoff = TT.elf_int(TT.elf+32+8*TT.bits, 4+4*TT.bits); + flags = TT.elf_int(TT.elf+36+12*TT.bits, 4); + ehsize = TT.elf_int(TT.elf+40+12*TT.bits, 2); + TT.phentsize = TT.elf_int(TT.elf+42+12*TT.bits, 2); + phnum = TT.elf_int(TT.elf+44+12*TT.bits, 2); + TT.shentsize = TT.elf_int(TT.elf+46+12*TT.bits, 2); + TT.shnum = TT.elf_int(TT.elf+48+12*TT.bits, 2); + shstrndx = TT.elf_int(TT.elf+50+12*TT.bits, 2); + + // Set up the section header string table so we can use section header names. + // Core files have shstrndx == 0. + TT.shstrtab = 0; + if (shstrndx != 0) { + get_sh(shstrndx, &shstr); + if (shstr.type != 3 /*SHT_STRTAB*/) { + error_msg("%s: bad shstrndx", TT.f); + return; + } + TT.shstrtab = TT.elf+shstr.offset; + } + + if (toys.optc > 1) printf("\nFile: %s\n", TT.f); + + if (FLAG(h)) { + printf("ELF Header:\n"); + printf(" Magic: "); + for (i=0; i<16; i++) printf("%02x%c", TT.elf[i], i==15?'\n':' '); + printf(" Class: ELF%d\n", TT.bits?64:32); + printf(" Data: 2's complement, %s endian\n", + (endian==2)?"big":"little"); + printf(" Version: 1 (current)\n"); + printf(" OS/ABI: %s\n", os_abi(TT.elf[7])); + printf(" ABI Version: %d\n", TT.elf[8]); + printf(" Type: %s\n", et_type(elf_type)); + printf(" Machine: %s\n", + elf_arch_name(TT.elf_int(TT.elf+18, 2))); + printf(" Version: 0x%x\n", + (int) TT.elf_int(TT.elf+20, 4)); + printf(" Entry point address: 0x%x\n", entry); + printf(" Start of program headers: %ld (bytes into file)\n", + TT.phoff); + printf(" Start of section headers: %ld (bytes into file)\n", + TT.shoff); + printf(" Flags: 0x%x\n", flags); + printf(" Size of this header: %d (bytes)\n", ehsize); + printf(" Size of program headers: %d (bytes)\n", TT.phentsize); + printf(" Number of program headers: %d\n", phnum); + printf(" Size of section headers: %d (bytes)\n", TT.shentsize); + printf(" Number of section headers: %d\n", TT.shnum); + printf(" Section header string table index: %d\n", shstrndx); + } + + w = 8*(TT.bits+1); + if (FLAG(S)) { + if (!TT.shnum) printf("\nThere are no sections in this file.\n"); + else { + if (!FLAG(h)) { + printf("There are %d section headers, starting at offset %#lx:\n", + TT.shnum, TT.shoff); + } + printf("\n" + "Section Headers:\n" + " [Nr] %-20s %-14s %-*s %-6s %-6s ES Flg Lk Inf Al\n", + "Name", "Type", w, "Address", "Off", "Size"); + } + } + // We need to iterate through the section headers even if we're not + // dumping them, to find specific sections. + for (i=0; i= ph.offset && s.offset+s.size <= ph.offset+ph.filesz) + printf(" %s", s.name); + } + xputc('\n'); + } + } + } + + // binutils ld emits a bunch of extra DT_NULL entries, so binutils readelf + // uses two passes here! We just tell the truth, which matches -h. + if (FLAG(d)) { + char *dyn = TT.elf+dynamic.offset, *end = dyn+dynamic.size; + + xputc('\n'); + if (!dynamic.size) printf("There is no dynamic section in this file.\n"); + else printf("Dynamic section at offset 0x%lx contains %ld entries:\n" + " %-*s %-20s %s\n", + dynamic.offset, dynamic.size/dynamic.entsize, + w+2, "Tag", "Type", "Name/Value"); + for (; dyn < end; dyn += dynamic.entsize) { + int es = 4*(TT.bits+1); + long tag = TT.elf_int(dyn, es), val = TT.elf_int(dyn+es, es); + char *type = dt_type(tag); + + printf(" 0x%0*lx %-20s ", w, tag, *type=='0' ? type : type+1); + if (*type == 'd') printf("%ld\n", val); + else if (*type == 'b') printf("%ld (bytes)\n", val); + else if (*type == 's') printf("%s\n", TT.elf+dynstr.offset+val); + else if (*type == 'f' || *type == 'F') { + struct bitname { int bit; char *s; } + df_names[] = {{0, "ORIGIN"},{1,"SYMBOLIC"},{2,"TEXTREL"}, + {3,"BIND_NOW"},{4,"STATIC_TLS"},{}}, + df_1_names[]={{0,"NOW"},{1,"GLOBAL"},{2,"GROUP"},{3,"NODELETE"}, + {5,"INITFIRST"},{27,"PIE"},{}}, + *names = *type == 'f' ? df_names : df_1_names; + int mask; + + if (*type == 'F') printf("Flags: "); + for (j=0; names[j].s; j++) { + if (val & (mask=(1<=' ' && *p<='~' ? *p : '.'); + xputc('\n'); + } + printf("\n"); + } + } + + if (FLAG(p)) { + if (find_section(TT.p, &s)) { + char *begin = TT.elf+s.offset, *end = begin + s.size, *p = begin; + int any = 0; + + printf("\nString dump of section '%s':\n", s.name); + for (; p < end; p++) { + if (isprint(*p)) { + printf(" [%6tx] ", p-begin); + while (p < end && isprint(*p)) putchar(*p++); + xputc('\n'); + any=1; + } + } + if (!any) printf(" No strings found in this section.\n"); + printf("\n"); + } + } +} + +void readelf_main(void) +{ + char **arg; + int all = FLAG_d|FLAG_h|FLAG_l|FLAG_n|FLAG_S|FLAG_s|FLAG_dyn_syms; + + if (FLAG(a)) toys.optflags |= all; + if (FLAG(s)) toys.optflags |= FLAG_dyn_syms; + if (!(toys.optflags & (all|FLAG_p|FLAG_x))) help_exit("needs a flag"); + + for (arg = toys.optargs; *arg; arg++) { + int fd = open(TT.f = *arg, O_RDONLY); + struct stat sb; + + if (fd == -1) perror_msg("%s", TT.f); + else { + if (fstat(fd, &sb)) perror_msg("%s", TT.f); + else if (!sb.st_size) error_msg("%s: empty", TT.f); + else if (!S_ISREG(sb.st_mode)) error_msg("%s: not a regular file",TT.f); + else { + TT.elf = xmmap(NULL, TT.size=sb.st_size, PROT_READ, MAP_SHARED, fd, 0); + scan_elf(); + munmap(TT.elf, TT.size); + } + close(fd); + } + } +} diff --git a/toys/posix/file.c b/toys/posix/file.c index 5d6eef12..f2cd3af8 100644 --- a/toys/posix/file.c +++ b/toys/posix/file.c @@ -36,21 +36,6 @@ static void do_elf_file(int fd) int endian = toybuf[5], bits = toybuf[4], i, j, dynamic = 0, stripped = 1, phentsize, phnum, shsize, shnum; int64_t (*elf_int)(void *ptr, unsigned size); - // Values from include/linux/elf-em.h (plus arch/*/include/asm/elf.h) - // Names are linux/arch/ directory (sometimes before 32/64 bit merges) - struct {int val; char *name;} type[] = {{0x9026, "alpha"}, {93, "arc"}, - {195, "arcv2"}, {40, "arm"}, {183, "arm64"}, {0x18ad, "avr32"}, - {247, "bpf"}, {106, "blackfin"}, {140, "c6x"}, {23, "cell"}, {76, "cris"}, - {252, "csky"}, {0x5441, "frv"}, {46, "h8300"}, {164, "hexagon"}, - {50, "ia64"}, {88, "m32r"}, {0x9041, "m32r"}, {4, "m68k"}, {174, "metag"}, - {189, "microblaze"}, {0xbaab, "microblaze-old"}, {8, "mips"}, - {10, "mips-old"}, {89, "mn10300"}, {0xbeef, "mn10300-old"}, {113, "nios2"}, - {92, "openrisc"}, {0x8472, "openrisc-old"}, {15, "parisc"}, {20, "ppc"}, - {21, "ppc64"}, {243, "riscv"}, {22, "s390"}, {0xa390, "s390-old"}, - {135, "score"}, {42, "sh"}, {2, "sparc"}, {18, "sparc8+"}, {43, "sparc9"}, - {188, "tile"}, {191, "tilegx"}, {3, "386"}, {6, "486"}, {62, "x86-64"}, - {94, "xtensa"}, {0xabc7, "xtensa-old"} - }; char *map = 0; off_t phoff, shoff; @@ -83,11 +68,8 @@ static void do_elf_file(int fd) endian = 0; } - // e_machine, ala "x86", from big table above - j = elf_int(toybuf+18, 2); - for (i = 0; i