diff options
Diffstat (limited to 'toys/other/mke2fs.c')
-rw-r--r-- | toys/other/mke2fs.c | 662 |
1 files changed, 662 insertions, 0 deletions
diff --git a/toys/other/mke2fs.c b/toys/other/mke2fs.c new file mode 100644 index 00000000..47f31f2d --- /dev/null +++ b/toys/other/mke2fs.c @@ -0,0 +1,662 @@ +/* vi: set ts=4: + * + * mke2fs.c - Create an ext2 filesystem image. + * + * Copyright 2006, 2007 Rob Landley <rob@landley.net> + * + * Not in SUSv3. + +// Still to go: "E:jJ:L:m:O:" +USE_MKE2FS(NEWTOY(mke2fs, "<1>2g:Fnqm#N#i#b#", TOYFLAG_SBIN)) + +config MKE2FS + bool "mke2fs (unfinished and broken by dirtree changes)" + default n + help + usage: mke2fs [-Fnq] [-b ###] [-N|i ###] [-m ###] device + + Create an ext2 filesystem on a block device or filesystem image. + + -F Force to run on a mounted device + -n Don't write to device + -q Quiet (no output) + -b size Block size (1024, 2048, or 4096) + -N inodes Allocate this many inodes + -i bytes Allocate one inode for every XXX bytes of device + -m percent Reserve this percent of filesystem space for root user + +config MKE2FS_JOURNAL + bool "Journaling support (ext3)" + default n + depends on MKE2FS + help + usage: [-j] [-J size=###,device=XXX] + + -j Create journal (ext3) + -J Journal options + size: Number of blocks (1024-102400) + device: Specify an external journal + +config MKE2FS_GEN + bool "Generate (gene2fs)" + default n + depends on MKE2FS + help + usage: gene2fs [options] device filename + + The [options] are the same as mke2fs. + +config MKE2FS_LABEL + bool "Label support" + default n + depends on MKE2FS + help + usage: mke2fs [-L label] [-M path] [-o string] + + -L Volume label + -M Path to mount point + -o Created by + +config MKE2FS_EXTENDED + bool "Extended options" + default n + depends on MKE2FS + help + usage: mke2fs [-E stride=###] [-O option[,option]] + + -E stride= Set RAID stripe size (in blocks) + -O [opts] Specify fewer ext2 option flags (for old kernels) + All of these are on by default (as appropriate) + none Clear default options (all but journaling) + dir_index Use htree indexes for large directories + filetype Store file type info in directory entry + has_journal Set by -j + journal_dev Set by -J device=XXX + sparse_super Don't allocate huge numbers of redundant superblocks +*/ + +#include "toys.h" + +DEFINE_GLOBALS( + // Command line arguments. + long blocksize; + long bytes_per_inode; + long inodes; // Total inodes in filesystem. + long reserved_percent; // Integer precent of space to reserve for root. + char *gendir; // Where to read dirtree from. + + // Internal data. + struct dirtree *dt; // Tree of files to copy into the new filesystem. + unsigned treeblocks; // Blocks used by dt + unsigned treeinodes; // Inodes used by dt + + unsigned blocks; // Total blocks in the filesystem. + unsigned freeblocks; // Free blocks in the filesystem. + unsigned inodespg; // Inodes per group + unsigned groups; // Total number of block groups. + unsigned blockbits; // Bits per block. (Also blocks per group.) + + // For gene2fs + unsigned nextblock; // Next data block to allocate + unsigned nextgroup; // Next group we'll be allocating from + int fsfd; // File descriptor of filesystem (to output to). + + struct ext2_superblock sb; +) + +// Shortcut to our global data structure, since we use it so much. +#define TT this.mke2fs + +#define INODES_RESERVED 10 + +static uint32_t div_round_up(uint32_t a, uint32_t b) +{ + uint32_t c = a/b; + + if (a%b) c++; + return c; +} + +// Calculate data blocks plus index blocks needed to hold a file. + +static uint32_t file_blocks_used(uint64_t size, uint32_t *blocklist) +{ + uint32_t dblocks = (uint32_t)((size+(TT.blocksize-1))/TT.blocksize); + uint32_t idx=TT.blocksize/4, iblocks=0, diblocks=0, tiblocks=0; + + // Fill out index blocks in inode. + + if (blocklist) { + int i; + + // Direct index blocks + for (i=0; i<13 && i<dblocks; i++) blocklist[i] = i; + // Singly indirect index blocks + if (dblocks > 13+idx) blocklist[13] = 13+idx; + // Doubly indirect index blocks + idx = 13 + idx + (idx*idx); + if (dblocks > idx) blocklist[14] = idx; + + return 0; + } + + // Account for direct, singly, doubly, and triply indirect index blocks + + if (dblocks > 12) { + iblocks = ((dblocks-13)/idx)+1; + if (iblocks > 1) { + diblocks = ((iblocks-2)/idx)+1; + if (diblocks > 1) + tiblocks = ((diblocks-2)/idx)+1; + } + } + + return dblocks + iblocks + diblocks + tiblocks; +} + +// Use the parent pointer to iterate through the tree non-recursively. +static struct dirtree *treenext(struct dirtree *this) +{ + while (this && !this->next) this = this->parent; + if (this) this = this->next; + + return this; +} + +// Recursively calculate the number of blocks used by each inode in the tree. +// Returns blocks used by this directory, assigns bytes used to *size. +// Writes total block count to TT.treeblocks and inode count to TT.treeinodes. + +static long check_treesize(struct dirtree *that, off_t *size) +{ + long blocks; + + while (that) { + *size += sizeof(struct ext2_dentry) + strlen(that->name); + + if (that->child) + that->st.st_blocks = check_treesize(that->child, &that->st.st_size); + else if (S_ISREG(that->st.st_mode)) { + that->st.st_blocks = file_blocks_used(that->st.st_size, 0); + TT.treeblocks += that->st.st_blocks; + } + that = that->next; + } + TT.treeblocks += blocks = file_blocks_used(*size, 0); + TT.treeinodes++; + + return blocks; +} + +// Calculate inode numbers and link counts. +// +// To do this right I need to copy the tree and sort it, but here's a really +// ugly n^2 way of dealing with the problem that doesn't scale well to large +// numbers of files (> 100,000) but can be done in very little code. +// This rewrites inode numbers to their final values, allocating depth first. + +static void check_treelinks(struct dirtree *tree) +{ + struct dirtree *current=tree, *that; + long inode = INODES_RESERVED; + + while (current) { + ++inode; + // Since we can't hardlink to directories, we know their link count. + if (S_ISDIR(current->st.st_mode)) current->st.st_nlink = 2; + else { + dev_t new = current->st.st_dev; + + if (!new) continue; + + // Look for other copies of current node + current->st.st_nlink = 0; + for (that = tree; that; that = treenext(that)) { + if (current->st.st_ino == that->st.st_ino && + current->st.st_dev == that->st.st_dev) + { + current->st.st_nlink++; + current->st.st_ino = inode; + } + } + } + current->st.st_ino = inode; + current = treenext(current); + } +} + +// According to http://www.opengroup.org/onlinepubs/9629399/apdxa.htm +// we should generate a uuid structure by reading a clock with 100 nanosecond +// precision, normalizing it to the start of the gregorian calendar in 1582, +// and looking up our eth0 mac address. +// +// On the other hand, we have 128 bits to come up with a unique identifier, of +// which 6 have a defined value. /dev/urandom it is. + +static void create_uuid(char *uuid) +{ + // Read 128 random bits + int fd = xopen("/dev/urandom", O_RDONLY); + xreadall(fd, uuid, 16); + close(fd); + + // Claim to be a DCE format UUID. + uuid[6] = (uuid[6] & 0x0F) | 0x40; + uuid[8] = (uuid[8] & 0x3F) | 0x80; + + // rfc2518 section 6.4.1 suggests if we're not using a macaddr, we should + // set bit 1 of the node ID, which is the mac multicast bit. This means we + // should never collide with anybody actually using a macaddr. + uuid[11] = uuid[11] | 128; +} + +// Calculate inodes per group from total inodes. +static uint32_t get_inodespg(uint32_t inodes) +{ + uint32_t temp; + + // Round up to fill complete inode blocks. + temp = (inodes + TT.groups - 1) / TT.groups; + inodes = TT.blocksize/sizeof(struct ext2_inode); + return ((temp + inodes - 1)/inodes)*inodes; +} + +// Fill out superblock and TT structures. + +static void init_superblock(struct ext2_superblock *sb) +{ + uint32_t temp; + + // Set log_block_size and log_frag_size. + + for (temp = 0; temp < 4; temp++) if (TT.blocksize == 1024<<temp) break; + if (temp==4) error_exit("bad blocksize"); + sb->log_block_size = sb->log_frag_size = SWAP_LE32(temp); + + // Fill out blocks_count, r_blocks_count, first_data_block + + sb->blocks_count = SWAP_LE32(TT.blocks); + sb->free_blocks_count = SWAP_LE32(TT.freeblocks); + temp = (TT.blocks * (uint64_t)TT.reserved_percent) / 100; + sb->r_blocks_count = SWAP_LE32(temp); + + sb->first_data_block = SWAP_LE32(TT.blocksize == 1024 ? 1 : 0); + + // Set blocks_per_group and frags_per_group, which is the size of an + // allocation bitmap that fits in one block (I.E. how many bits per block)? + + sb->blocks_per_group = sb->frags_per_group = SWAP_LE32(TT.blockbits); + + // Set inodes_per_group and total inodes_count + sb->inodes_per_group = SWAP_LE32(TT.inodespg); + sb->inodes_count = SWAP_LE32(TT.inodespg * TT.groups); + + // Determine free inodes. + temp = TT.inodespg*TT.groups - INODES_RESERVED; + if (temp < TT.treeinodes) error_exit("Not enough inodes.\n"); + sb->free_inodes_count = SWAP_LE32(temp - TT.treeinodes); + + // Fill out the rest of the superblock. + sb->max_mnt_count=0xFFFF; + sb->wtime = sb->lastcheck = sb->mkfs_time = SWAP_LE32(time(NULL)); + sb->magic = SWAP_LE32(0xEF53); + sb->state = sb->errors = SWAP_LE16(1); + + sb->rev_level = SWAP_LE32(1); + sb->first_ino = SWAP_LE32(INODES_RESERVED+1); + sb->inode_size = SWAP_LE16(sizeof(struct ext2_inode)); + sb->feature_incompat = SWAP_LE32(EXT2_FEATURE_INCOMPAT_FILETYPE); + sb->feature_ro_compat = SWAP_LE32(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER); + + create_uuid(sb->uuid); + + // TODO If we're called as mke3fs or mkfs.ext3, do a journal. + + //if (strchr(toys.which->name,'3')) + // sb->feature_compat |= SWAP_LE32(EXT3_FEATURE_COMPAT_HAS_JOURNAL); +} + +// Does this group contain a superblock backup (and group descriptor table)? +static int is_sb_group(uint32_t group) +{ + int i; + + // Superblock backups are on groups 0, 1, and powers of 3, 5, and 7. + if(!group || group==1) return 1; + for (i=3; i<9; i+=2) { + int j = i; + while (j<group) j*=i; + if (j==group) return 1; + } + return 0; +} + + +// Number of blocks used in group by optional superblock/group list backup. +static int group_superblock_overhead(uint32_t group) +{ + int used; + + if (!is_sb_group(group)) return 0; + + // How many blocks does the group descriptor table take up? + used = TT.groups * sizeof(struct ext2_group); + used += TT.blocksize - 1; + used /= TT.blocksize; + // Plus the superblock itself. + used++; + // And a corner case. + if (!group && TT.blocksize == 1024) used++; + + return used; +} + +// Number of blocks used in group to store superblock/group/inode list +static int group_overhead(uint32_t group) +{ + // Return superblock backup overhead (if any), plus block/inode + // allocation bitmaps, plus inode tables. + return group_superblock_overhead(group) + 2 + get_inodespg(TT.inodespg) + / (TT.blocksize/sizeof(struct ext2_inode)); +} + +// In bitmap "array" set "len" bits starting at position "start" (from 0). +static void bits_set(char *array, int start, int len) +{ + while(len) { + if ((start&7) || len<8) { + array[start/8]|=(1<<(start&7)); + start++; + len--; + } else { + array[start/8]=255; + start+=8; + len-=8; + } + } +} + +// Seek past len bytes (to maintain sparse file), or write zeroes if output +// not seekable +static void put_zeroes(int len) +{ + if(-1 == lseek(TT.fsfd, len, SEEK_SET)) { + memset(toybuf, 0, sizeof(toybuf)); + while (len) { + int out = len > sizeof(toybuf) ? sizeof(toybuf) : len; + xwrite(TT.fsfd, toybuf, out); + len -= out; + } + } +} + +// Fill out an inode structure from struct stat info in dirtree. +static void fill_inode(struct ext2_inode *in, struct dirtree *that) +{ + uint32_t fbu[15]; + int temp; + + file_blocks_used(that->st.st_size, fbu); + + // If that inode needs data blocks allocated to it. + if (that->st.st_size) { + int i, group = TT.nextblock/TT.blockbits; + + // TODO: teach this about indirect blocks. + for (i=0; i<15; i++) { + // If we just jumped into a new group, skip group overhead blocks. + while (group >= TT.nextgroup) + TT.nextblock += group_overhead(TT.nextgroup++); + } + } + // TODO : S_ISREG/DIR/CHR/BLK/FIFO/LNK/SOCK(m) + in->mode = SWAP_LE32(that->st.st_mode); + + in->uid = SWAP_LE16(that->st.st_uid & 0xFFFF); + in->uid_high = SWAP_LE16(that->st.st_uid >> 16); + in->gid = SWAP_LE16(that->st.st_gid & 0xFFFF); + in->gid_high = SWAP_LE16(that->st.st_gid >> 16); + in->size = SWAP_LE32(that->st.st_size & 0xFFFFFFFF); + + // Contortions to make the compiler not generate a warning for x>>32 + // when x is 32 bits. The optimizer should clean this up. + if (sizeof(that->st.st_size) > 4) temp = 32; + else temp = 0; + if (temp) in->dir_acl = SWAP_LE32(that->st.st_size >> temp); + + in->atime = SWAP_LE32(that->st.st_atime); + in->ctime = SWAP_LE32(that->st.st_ctime); + in->mtime = SWAP_LE32(that->st.st_mtime); + + in->links_count = SWAP_LE16(that->st.st_nlink); + in->blocks = SWAP_LE32(that->st.st_blocks); + // in->faddr +} + +// Works like an archiver. +// The first argument is the name of the file to create. If it already +// exists, that size will be used. + +void mke2fs_main(void) +{ + int i, temp; + off_t length; + uint32_t usedblocks, usedinodes, dtiblk, dtbblk; + struct dirtree *dti, *dtb; + + // Handle command line arguments. + + if (toys.optargs[1]) { + sscanf(toys.optargs[1], "%u", &TT.blocks); + temp = O_RDWR|O_CREAT; + } else temp = O_RDWR; + if (!TT.reserved_percent) TT.reserved_percent = 5; + + // TODO: Check if filesystem is mounted here + + // For mke?fs, open file. For gene?fs, create file. + TT.fsfd = xcreate(*toys.optargs, temp, 0777); + + // Determine appropriate block size and block count from file length. + // (If no length, default to 4k. They can override it on the cmdline.) + + length = fdlength(TT.fsfd); + if (!TT.blocksize) TT.blocksize = (length && length < 1<<29) ? 1024 : 4096; + TT.blockbits = 8*TT.blocksize; + if (!TT.blocks) TT.blocks = length/TT.blocksize; + + // Collect gene2fs list or lost+found, calculate requirements. + + if (TT.gendir) { + strncpy(toybuf, TT.gendir, sizeof(toybuf)); + dti = dirtree_read(toybuf, NULL, NULL); + } else { + dti = xzalloc(sizeof(struct dirtree)+11); + strcpy(dti->name, "lost+found"); + dti->st.st_mode = S_IFDIR|0755; + dti->st.st_ctime = dti->st.st_mtime = time(NULL); + } + + // Add root directory inode. This is iterated through for when finding + // blocks, but not when finding inodes. The tree's parent pointers don't + // point back into this. + + dtb = xzalloc(sizeof(struct dirtree)+1); + dtb->st.st_mode = S_IFDIR|0755; + dtb->st.st_ctime = dtb->st.st_mtime = time(NULL); + dtb->child = dti; + + // Figure out how much space is used by preset files + length = check_treesize(dtb, &(dtb->st.st_size)); + check_treelinks(dtb); + + // Figure out how many total inodes we need. + + if (!TT.inodes) { + if (!TT.bytes_per_inode) TT.bytes_per_inode = 8192; + TT.inodes = (TT.blocks * (uint64_t)TT.blocksize) / TT.bytes_per_inode; + } + + // If we're generating a filesystem and have no idea how many blocks it + // needs, start with a minimal guess, find the overhead of that many + // groups, and loop until this is enough groups to store this many blocks. + if (!TT.blocks) TT.groups = (TT.treeblocks/TT.blockbits)+1; + else TT.groups = div_round_up(TT.blocks, TT.blockbits); + + for (;;) { + temp = TT.treeblocks; + + for (i = 0; i<TT.groups; i++) temp += group_overhead(i); + + if (TT.blocks) { + if (TT.blocks < temp) error_exit("Not enough space.\n"); + break; + } + if (temp <= TT.groups * TT.blockbits) { + TT.blocks = temp; + break; + } + TT.groups++; + } + TT.freeblocks = TT.blocks - temp; + + // Now we know all the TT data, initialize superblock structure. + + init_superblock(&TT.sb); + + // Start writing. Skip the first 1k to avoid the boot sector (if any). + put_zeroes(1024); + + // Loop through block groups, write out each one. + dtiblk = dtbblk = usedblocks = usedinodes = 0; + for (i=0; i<TT.groups; i++) { + struct ext2_inode *in = (struct ext2_inode *)toybuf; + uint32_t start, itable, used, end; + int j, slot; + + // Where does this group end? + end = TT.blockbits; + if ((i+1)*TT.blockbits > TT.blocks) end = TT.blocks & (TT.blockbits-1); + + // Blocks used by inode table + itable = (TT.inodespg*sizeof(struct ext2_inode))/TT.blocksize; + + // If a superblock goes here, write it out. + start = group_superblock_overhead(i); + if (start) { + struct ext2_group *bg = (struct ext2_group *)toybuf; + int treeblocks = TT.treeblocks, treeinodes = TT.treeinodes; + + TT.sb.block_group_nr = SWAP_LE16(i); + + // Write superblock and pad it up to block size + xwrite(TT.fsfd, &TT.sb, sizeof(struct ext2_superblock)); + temp = TT.blocksize - sizeof(struct ext2_superblock); + if (!i && TT.blocksize > 1024) temp -= 1024; + memset(toybuf, 0, TT.blocksize); + xwrite(TT.fsfd, toybuf, temp); + + // Loop through groups to write group descriptor table. + for(j=0; j<TT.groups; j++) { + + // Figure out what sector this group starts in. + used = group_superblock_overhead(j); + + // Find next array slot in this block (flush block if full). + slot = j % (TT.blocksize/sizeof(struct ext2_group)); + if (!slot) { + if (j) xwrite(TT.fsfd, bg, TT.blocksize); + memset(bg, 0, TT.blocksize); + } + + // How many free inodes in this group? + temp = TT.inodespg; + if (!i) temp -= INODES_RESERVED; + if (temp > treeinodes) { + treeinodes -= temp; + temp = 0; + } else { + temp -= treeinodes; + treeinodes = 0; + } + bg[slot].free_inodes_count = SWAP_LE16(temp); + + // How many free blocks in this group? + temp = TT.inodespg/(TT.blocksize/sizeof(struct ext2_inode)) + 2; + temp = end-used-temp; + if (temp > treeblocks) { + treeblocks -= temp; + temp = 0; + } else { + temp -= treeblocks; + treeblocks = 0; + } + bg[slot].free_blocks_count = SWAP_LE32(temp); + + // Fill out rest of group structure + used += j*TT.blockbits; + bg[slot].block_bitmap = SWAP_LE32(used++); + bg[slot].inode_bitmap = SWAP_LE32(used++); + bg[slot].inode_table = SWAP_LE32(used); + bg[slot].used_dirs_count = 0; // (TODO) + } + xwrite(TT.fsfd, bg, TT.blocksize); + } + + // Now write out stuff that every block group has. + + // Write block usage bitmap + + start += 2 + itable; + memset(toybuf, 0, TT.blocksize); + bits_set(toybuf, 0, start); + bits_set(toybuf, end, TT.blockbits-end); + temp = TT.treeblocks - usedblocks; + if (temp) { + if (end-start > temp) temp = end-start; + bits_set(toybuf, start, temp); + } + xwrite(TT.fsfd, toybuf, TT.blocksize); + + // Write inode bitmap + memset(toybuf, 0, TT.blocksize); + j = 0; + if (!i) bits_set(toybuf, 0, j = INODES_RESERVED); + bits_set(toybuf, TT.inodespg, slot = TT.blockbits-TT.inodespg); + temp = TT.treeinodes - usedinodes; + if (temp) { + if (slot-j > temp) temp = slot-j; + bits_set(toybuf, j, temp); + } + xwrite(TT.fsfd, toybuf, TT.blocksize); + + // Write inode table for this group (TODO) + for (j = 0; j<TT.inodespg; j++) { + slot = j % (TT.blocksize/sizeof(struct ext2_inode)); + if (!slot) { + if (j) xwrite(TT.fsfd, in, TT.blocksize); + memset(in, 0, TT.blocksize); + } + if (!i && j<INODES_RESERVED) { + // Write root inode + if (j == 2) fill_inode(in+slot, dtb); + } else if (dti) { + fill_inode(in+slot, dti); + dti = treenext(dti); + } + } + xwrite(TT.fsfd, in, TT.blocksize); + + while (dtb) { + // TODO write index data block + // TODO write root directory data block + // TODO write directory data block + // TODO write file data block + put_zeroes(TT.blocksize); + start++; + if (start == end) break; + } + // Write data blocks (TODO) + put_zeroes((end-start) * TT.blocksize); + } +} |