From 8d95074b7d034188af8542aaea0306d3670d71be Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Mon, 7 Mar 2016 16:02:47 -0600 Subject: Cleanup pass on the dirtree infrastructure, in preparation for making rm -r handle infinite depth. Fix docs, tweak dirtree_handle_callback() semantics, remove dirtree_start() and don't export dirtree_handle_callback(), instead offer dirtree_flagread(). (dirtree_read() is a wrapper around dirtree_flagread passing 0 for flags.) --- lib/dirtree.c | 39 ++++++++++++------------- lib/lib.h | 7 +++-- toys/pending/diff.c | 2 +- toys/pending/tar.c | 2 +- toys/posix/chgrp.c | 4 +-- toys/posix/cp.c | 7 ++--- toys/posix/du.c | 2 +- toys/posix/find.c | 2 +- toys/posix/ls.c | 11 ++++--- www/code.html | 83 ++++++++++++++++++++++++++++++++--------------------- 10 files changed, 90 insertions(+), 69 deletions(-) diff --git a/lib/dirtree.c b/lib/dirtree.c index 8b9f2993..cc1ab90c 100644 --- a/lib/dirtree.c +++ b/lib/dirtree.c @@ -17,7 +17,7 @@ static int notdotdot(char *name) int dirtree_notdotdot(struct dirtree *catch) { // Should we skip "." and ".."? - return notdotdot(catch->name) ? DIRTREE_SAVE|DIRTREE_RECURSE : 0; + return notdotdot(catch->name)*(DIRTREE_SAVE|DIRTREE_RECURSE); } // Create a dirtree node from a path, with stat and symlink info. @@ -96,21 +96,18 @@ int dirtree_parentfd(struct dirtree *node) return node->parent ? node->parent->dirfd : AT_FDCWD; } -// Handle callback for a node in the tree. Returns saved node(s) or NULL. -// -// By default, allocates a tree of struct dirtree, not following symlinks -// If callback==NULL, or callback always returns 0, allocate tree of struct -// dirtree and return root of tree. Otherwise call callback(node) on each -// hit, free structures after use, and return NULL. -// +// Handle callback for a node in the tree. Returns saved node(s) if +// callback returns DIRTREE_SAVE, otherwise frees consumed nodes and +// returns NULL. If !callback return top node unchanged. +// If !new return DIRTREE_ABORTVAL struct dirtree *dirtree_handle_callback(struct dirtree *new, int (*callback)(struct dirtree *node)) { int flags; - if (!new) return 0; - if (!callback) callback = dirtree_notdotdot; + if (!new) return DIRTREE_ABORTVAL; + if (!callback) return new; flags = callback(new); if (S_ISDIR(new->st.st_mode)) { @@ -176,19 +173,21 @@ int dirtree_recurse(struct dirtree *node, return flags; } -// Create dirtree root -struct dirtree *dirtree_start(char *name, int symfollow) +// Create dirtree from path, using callback to filter nodes. If !callback +// return just the top node. Use dirtree_notdotdot callback to allocate a +// tree of struct dirtree nodes and return pointer to root node for later +// processing. +// Returns DIRTREE_ABORTVAL if path didn't exist (use DIRTREE_SHUTUP to handle +// error message yourself). + +struct dirtree *dirtree_flagread(char *path, int flags, + int (*callback)(struct dirtree *node)) { - return dirtree_add_node(0, name, DIRTREE_SYMFOLLOW*!!symfollow); + return dirtree_handle_callback(dirtree_add_node(0, path, flags), callback); } -// Create dirtree from path, using callback to filter nodes. -// If callback == NULL allocate a tree of struct dirtree nodes and return -// pointer to root node. - +// Common case struct dirtree *dirtree_read(char *path, int (*callback)(struct dirtree *node)) { - struct dirtree *root = dirtree_start(path, 0); - - return root ? dirtree_handle_callback(root, callback) : DIRTREE_ABORTVAL; + return dirtree_flagread(path, 0, callback); } diff --git a/lib/lib.h b/lib/lib.h index c8cff6ef..917be0c1 100644 --- a/lib/lib.h +++ b/lib/lib.h @@ -62,6 +62,8 @@ void get_optflags(void); #define DIRTREE_SYMFOLLOW 8 // Don't warn about failure to stat #define DIRTREE_SHUTUP 16 +// Breadth first traversal, conserves filehandles at the expense of memory +#define DIRTREE_BREADTH 32 // Don't look at any more files in this directory. #define DIRTREE_ABORT 256 @@ -77,15 +79,14 @@ struct dirtree { char name[]; }; -struct dirtree *dirtree_start(char *name, int symfollow); struct dirtree *dirtree_add_node(struct dirtree *p, char *name, int flags); char *dirtree_path(struct dirtree *node, int *plen); int dirtree_notdotdot(struct dirtree *catch); int dirtree_parentfd(struct dirtree *node); -struct dirtree *dirtree_handle_callback(struct dirtree *new, - int (*callback)(struct dirtree *node)); int dirtree_recurse(struct dirtree *node, int (*callback)(struct dirtree *node), int symfollow); +struct dirtree *dirtree_flagread(char *path, int flags, + int (*callback)(struct dirtree *node)); struct dirtree *dirtree_read(char *path, int (*callback)(struct dirtree *node)); // help.c diff --git a/toys/pending/diff.c b/toys/pending/diff.c index 80238614..da6c13a0 100644 --- a/toys/pending/diff.c +++ b/toys/pending/diff.c @@ -798,7 +798,7 @@ void diff_main(void) if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode)) { for (j = 0; j < 2; j++) { memset(&dir[j], 0, sizeof(dir)); - dirtree_handle_callback(dirtree_start(files[j], 1), list_dir); + dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir); dir[j].nr_elm = TT.size; //size updated in list_dir qsort(&(dir[j].list[1]), (TT.size - 1), sizeof(char*), cmp); diff --git a/toys/pending/tar.c b/toys/pending/tar.c index c5043087..384199ea 100644 --- a/toys/pending/tar.c +++ b/toys/pending/tar.c @@ -788,7 +788,7 @@ void tar_main(void) for (tmp = TT.inc; tmp; tmp = tmp->next) { TT.handle = tar_hdl; //recurse thru dir and add files to archive - dirtree_handle_callback(dirtree_start(tmp->arg, toys.optflags & FLAG_h), + dirtree_flagread(tmp->arg, DIRTREE_SYMFOLLOW*!!(toys.optflags&FLAG_h), add_to_tar); } memset(toybuf, 0, 1024); diff --git a/toys/posix/chgrp.c b/toys/posix/chgrp.c index 6b95c6ad..e0690c93 100644 --- a/toys/posix/chgrp.c +++ b/toys/posix/chgrp.c @@ -94,8 +94,8 @@ void chgrp_main(void) TT.group = xgetgrnamid(TT.group_name)->gr_gid; for (s=toys.optargs+1; *s; s++) - dirtree_handle_callback(dirtree_start(*s, toys.optflags&(FLAG_H|FLAG_L)), - do_chgrp);; + dirtree_flagread(*s, DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), + do_chgrp); if (CFG_TOYBOX_FREE && ischown) free(own); } diff --git a/toys/posix/cp.c b/toys/posix/cp.c index cb7e6e3b..0e6a2efa 100644 --- a/toys/posix/cp.c +++ b/toys/posix/cp.c @@ -430,12 +430,11 @@ void cp_main(void) if (rc) rc = rename(src, TT.destname); } - // Skip nonexistent sources + // Copy if we didn't mv, skipping nonexistent sources if (rc) { - if (errno!=EXDEV || - !(new = dirtree_start(src, toys.optflags&(FLAG_H|FLAG_L)))) + if (errno!=EXDEV || dirtree_flagread(src, DIRTREE_SHUTUP+ + DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), TT.callback)) perror_msg("bad '%s'", src); - else dirtree_handle_callback(new, TT.callback); } if (destdir) free(TT.destname); } diff --git a/toys/posix/du.c b/toys/posix/du.c index 77c7b6e2..2797b3f4 100644 --- a/toys/posix/du.c +++ b/toys/posix/du.c @@ -153,7 +153,7 @@ void du_main(void) // Loop over command line arguments, recursing through children for (args = toys.optc ? toys.optargs : noargs; *args; args++) - dirtree_handle_callback(dirtree_start(*args, toys.optflags&(FLAG_H|FLAG_L)), + dirtree_flagread(*args, DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), do_du); if (toys.optflags & FLAG_c) print(TT.total*512, 0); diff --git a/toys/posix/find.c b/toys/posix/find.c index a9c35f41..303aada2 100644 --- a/toys/posix/find.c +++ b/toys/posix/find.c @@ -556,7 +556,7 @@ void find_main(void) // Loop through paths for (i = 0; i < len; i++) - dirtree_handle_callback(dirtree_start(ss[i], toys.optflags&(FLAG_H|FLAG_L)), + dirtree_flagread(ss[i], DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), do_find); execdir(0, 1); diff --git a/toys/posix/ls.c b/toys/posix/ls.c index 4dcbe888..799631b1 100644 --- a/toys/posix/ls.c +++ b/toys/posix/ls.c @@ -540,12 +540,15 @@ void ls_main(void) if (toys.optflags & FLAG_d) toys.optflags &= ~FLAG_R; // Iterate through command line arguments, collecting directories and files. - // Non-absolute paths are relative to current directory. - TT.files = dirtree_start(0, 0); + // Non-absolute paths are relative to current directory. Top of tree is + // a dummy node to collect command line arguments into pseudo-directory. + TT.files = dirtree_add_node(0, 0, 0); TT.files->dirfd = AT_FDCWD; for (s = *toys.optargs ? toys.optargs : noargs; *s; s++) { - dt = dirtree_start(*s, !(toys.optflags&(FLAG_l|FLAG_d|FLAG_F)) || - (toys.optflags&(FLAG_L|FLAG_H))); + int sym = !(toys.optflags&(FLAG_l|FLAG_d|FLAG_F)) + || (toys.optflags&(FLAG_L|FLAG_H)); + + dt = dirtree_add_node(0, *s, DIRTREE_SYMFOLLOW*sym); // note: double_list->prev temporarirly goes in dirtree->parent if (dt) dlist_add_nomalloc((void *)&TT.files->child, (void *)dt); diff --git a/www/code.html b/www/code.html index b1c6d3f9..7e15e181 100644 --- a/www/code.html +++ b/www/code.html @@ -1198,16 +1198,32 @@ of functions.

These functions do not call chdir() or rely on PATH_MAX. Instead they use openat() and friends, using one filehandle per directory level to -recurseinto subdirectories. (I.E. they can descend 1000 directories deep +recurse into subdirectories. (I.E. they can descend 1000 directories deep if setrlimit(RLIMIT_NOFILE) allows enough open filehandles, and the default in /proc/self/limits is generally 1024.)

+

There are two main ways to use dirtree: 1) assemble a tree of nodes +representing a snapshot of directory state and traverse them using the +->next and ->child pointers, or 2) traverse the tree calling a callback +function on each entry, and freeing its node afterwards. (You can also +combine the two, using the callback as a filter to determine which nodes +to keep.)

+

The basic dirtree functions are:

-

The dirtree_read() function takes two arguments, a starting path for -the root of the tree, and a callback function. The callback takes a -struct dirtree * (from lib/lib.h) as its argument. If the callback is -NULL, the traversal uses a default callback (dirtree_notdotdot()) which -recursively assembles a tree of struct dirtree nodes for all files under -this directory and subdirectories (filtering out "." and ".." entries), -after which dirtree_read() returns the pointer to the root node of this -snapshot tree.

+

The dirtree_read() function is the standard way to start +directory traversal. It takes two arguments: a starting path for +the root of the tree, and a callback function. The callback() is called +on each directory entry, its argument is a fully populated +struct dirtree * (from lib/lib.h) describing the node, and its +return value tells the dirtree infrastructure what to do next.

-

Otherwise the callback() is called on each entry in the directory, -with struct dirtree * as its argument. This includes the initial -node created by dirtree_read() at the top of the tree.

+

(There's also a three argument version, +dirtree_flagread(char *path, int flags, int (*callback)(struct +dirtree node)), which lets you apply flags like DIRTREE_SYMFOLLOW and +DIRTREE_SHUTUP to reading the top node, but this only affects the top node. +Child nodes use the flags returned by callback().

struct dirtree

@@ -1237,12 +1253,13 @@ node created by dirtree_read() at the top of the tree.

st entries describing a file, plus a char *symlink which is NULL for non-symlinks.

-

During a callback function, the int data field of directory nodes -contains a dirfd (for use with the openat() family of functions). This is -generally used by calling dirtree_parentfd() on the callback's node argument. -For symlinks, data contains the length of the symlink string. On the second -callback from DIRTREE_COMEAGAIN (depth-first traversal) data = -1 for -all nodes (that's how you can tell it's the second callback).

+

During a callback function, the int dirfd field of directory nodes +contains a directory file descriptor (for use with the openat() family of +functions). This isn't usually used directly, intstead call dirtree_parentfd() +on the callback's node argument. The char again field is 0 for the +first callback on a node, and 1 on the second callback (triggered by returning +DIRTREE_COMEAGAIN on a directory, made after all children have been processed). +

Users of this code may put anything they like into the long extra field. For example, "cp" and "mv" use this to store a dirfd for the destination @@ -1266,15 +1283,17 @@ return DIRTREE_ABORT from parent callbacks too.)

  • DIRTREE_RECURSE - Examine directory contents. Ignored for non-directory entries. The remaining flags only take effect when recursing into the children of a directory.

  • -
  • DIRTREE_COMEAGAIN - Call the callback a second time after -examining all directory contents, allowing depth-first traversal. -On the second call, dirtree->data = -1.

  • +
  • DIRTREE_COMEAGAIN - Call the callback on this node a second time +after examining all directory contents, allowing depth-first traversal. +On the second call, dirtree->again is nonzero.

  • DIRTREE_SYMFOLLOW - follow symlinks when populating children's struct stat st (by feeding a nonzero value to the symfollow argument of dirtree_add_node()), which means DIRTREE_RECURSE treats symlinks to directories as directories. (Avoiding infinite recursion is the callback's problem: the non-NULL dirtree->symlink can still distinguish between -them.)

  • +them. The "find" command follows ->parent up the tree to the root node +each time, checking to make sure that stat's dev and inode pair don't +match any ancestors.)

    Each struct dirtree contains three pointers (next, parent, and child) @@ -1299,15 +1318,15 @@ single malloc() (even char *symlink points to memory at the end of the node), so llist_free() works but its callback must descend into child nodes (freeing a tree, not just a linked list), plus whatever the user stored in extra.

    -

    The dirtree_read() function is a simple wrapper, calling dirtree_add_node() +

    The dirtree_flagread() function is a simple wrapper, calling dirtree_add_node() to create a root node relative to the current directory, then calling -handle_callback() on that node (which recurses as instructed by the callback -return flags). Some commands (such as chgrp) bypass this wrapper, for example -to control whether or not to follow symlinks to the root node; symlinks +dirtree_handle_callback() on that node (which recurses as instructed by the callback +return flags). The flags argument primarily lets you +control whether or not to follow symlinks to the root node; symlinks listed on the command line are often treated differently than symlinks -encountered during recursive directory traversal). +encountered during recursive directory traversal. -

    The ls command not only bypasses the wrapper, but never returns +

    The ls command not only bypasses this wrapper, but never returns DIRTREE_RECURSE from the callback, instead calling dirtree_recurse() manually from elsewhere in the program. This gives ls -lR manual control of traversal order, which is neither depth first nor breadth first but -- cgit v1.2.3