diff options
author | Rob Landley <rob@landley.net> | 2016-03-07 16:02:47 -0600 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2016-03-07 16:02:47 -0600 |
commit | 8d95074b7d034188af8542aaea0306d3670d71be (patch) | |
tree | f2cc98fa718becc2b6379dc7c724ea35f266cd6a | |
parent | 2a26ba451a605c185242de50e1d91eeac0a2430e (diff) | |
download | toybox-8d95074b7d034188af8542aaea0306d3670d71be.tar.gz |
Cleanup pass on the dirtree infrastructure, in preparation for making rm -r
handle infinite depth. Fix docs, tweak dirtree_handle_callback() semantics,
remove dirtree_start() and don't export dirtree_handle_callback(), instead
offer dirtree_flagread(). (dirtree_read() is a wrapper around dirtree_flagread
passing 0 for flags.)
-rw-r--r-- | lib/dirtree.c | 39 | ||||
-rw-r--r-- | lib/lib.h | 7 | ||||
-rw-r--r-- | toys/pending/diff.c | 2 | ||||
-rw-r--r-- | toys/pending/tar.c | 2 | ||||
-rw-r--r-- | toys/posix/chgrp.c | 4 | ||||
-rw-r--r-- | toys/posix/cp.c | 7 | ||||
-rw-r--r-- | toys/posix/du.c | 2 | ||||
-rw-r--r-- | toys/posix/find.c | 2 | ||||
-rw-r--r-- | toys/posix/ls.c | 11 | ||||
-rw-r--r-- | www/code.html | 83 |
10 files changed, 90 insertions, 69 deletions
diff --git a/lib/dirtree.c b/lib/dirtree.c index 8b9f2993..cc1ab90c 100644 --- a/lib/dirtree.c +++ b/lib/dirtree.c @@ -17,7 +17,7 @@ static int notdotdot(char *name) int dirtree_notdotdot(struct dirtree *catch) { // Should we skip "." and ".."? - return notdotdot(catch->name) ? DIRTREE_SAVE|DIRTREE_RECURSE : 0; + return notdotdot(catch->name)*(DIRTREE_SAVE|DIRTREE_RECURSE); } // Create a dirtree node from a path, with stat and symlink info. @@ -96,21 +96,18 @@ int dirtree_parentfd(struct dirtree *node) return node->parent ? node->parent->dirfd : AT_FDCWD; } -// Handle callback for a node in the tree. Returns saved node(s) or NULL. -// -// By default, allocates a tree of struct dirtree, not following symlinks -// If callback==NULL, or callback always returns 0, allocate tree of struct -// dirtree and return root of tree. Otherwise call callback(node) on each -// hit, free structures after use, and return NULL. -// +// Handle callback for a node in the tree. Returns saved node(s) if +// callback returns DIRTREE_SAVE, otherwise frees consumed nodes and +// returns NULL. If !callback return top node unchanged. +// If !new return DIRTREE_ABORTVAL struct dirtree *dirtree_handle_callback(struct dirtree *new, int (*callback)(struct dirtree *node)) { int flags; - if (!new) return 0; - if (!callback) callback = dirtree_notdotdot; + if (!new) return DIRTREE_ABORTVAL; + if (!callback) return new; flags = callback(new); if (S_ISDIR(new->st.st_mode)) { @@ -176,19 +173,21 @@ int dirtree_recurse(struct dirtree *node, return flags; } -// Create dirtree root -struct dirtree *dirtree_start(char *name, int symfollow) +// Create dirtree from path, using callback to filter nodes. If !callback +// return just the top node. Use dirtree_notdotdot callback to allocate a +// tree of struct dirtree nodes and return pointer to root node for later +// processing. +// Returns DIRTREE_ABORTVAL if path didn't exist (use DIRTREE_SHUTUP to handle +// error message yourself). + +struct dirtree *dirtree_flagread(char *path, int flags, + int (*callback)(struct dirtree *node)) { - return dirtree_add_node(0, name, DIRTREE_SYMFOLLOW*!!symfollow); + return dirtree_handle_callback(dirtree_add_node(0, path, flags), callback); } -// Create dirtree from path, using callback to filter nodes. -// If callback == NULL allocate a tree of struct dirtree nodes and return -// pointer to root node. - +// Common case struct dirtree *dirtree_read(char *path, int (*callback)(struct dirtree *node)) { - struct dirtree *root = dirtree_start(path, 0); - - return root ? dirtree_handle_callback(root, callback) : DIRTREE_ABORTVAL; + return dirtree_flagread(path, 0, callback); } @@ -62,6 +62,8 @@ void get_optflags(void); #define DIRTREE_SYMFOLLOW 8 // Don't warn about failure to stat #define DIRTREE_SHUTUP 16 +// Breadth first traversal, conserves filehandles at the expense of memory +#define DIRTREE_BREADTH 32 // Don't look at any more files in this directory. #define DIRTREE_ABORT 256 @@ -77,15 +79,14 @@ struct dirtree { char name[]; }; -struct dirtree *dirtree_start(char *name, int symfollow); struct dirtree *dirtree_add_node(struct dirtree *p, char *name, int flags); char *dirtree_path(struct dirtree *node, int *plen); int dirtree_notdotdot(struct dirtree *catch); int dirtree_parentfd(struct dirtree *node); -struct dirtree *dirtree_handle_callback(struct dirtree *new, - int (*callback)(struct dirtree *node)); int dirtree_recurse(struct dirtree *node, int (*callback)(struct dirtree *node), int symfollow); +struct dirtree *dirtree_flagread(char *path, int flags, + int (*callback)(struct dirtree *node)); struct dirtree *dirtree_read(char *path, int (*callback)(struct dirtree *node)); // help.c diff --git a/toys/pending/diff.c b/toys/pending/diff.c index 80238614..da6c13a0 100644 --- a/toys/pending/diff.c +++ b/toys/pending/diff.c @@ -798,7 +798,7 @@ void diff_main(void) if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode)) { for (j = 0; j < 2; j++) { memset(&dir[j], 0, sizeof(dir)); - dirtree_handle_callback(dirtree_start(files[j], 1), list_dir); + dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir); dir[j].nr_elm = TT.size; //size updated in list_dir qsort(&(dir[j].list[1]), (TT.size - 1), sizeof(char*), cmp); diff --git a/toys/pending/tar.c b/toys/pending/tar.c index c5043087..384199ea 100644 --- a/toys/pending/tar.c +++ b/toys/pending/tar.c @@ -788,7 +788,7 @@ void tar_main(void) for (tmp = TT.inc; tmp; tmp = tmp->next) { TT.handle = tar_hdl; //recurse thru dir and add files to archive - dirtree_handle_callback(dirtree_start(tmp->arg, toys.optflags & FLAG_h), + dirtree_flagread(tmp->arg, DIRTREE_SYMFOLLOW*!!(toys.optflags&FLAG_h), add_to_tar); } memset(toybuf, 0, 1024); diff --git a/toys/posix/chgrp.c b/toys/posix/chgrp.c index 6b95c6ad..e0690c93 100644 --- a/toys/posix/chgrp.c +++ b/toys/posix/chgrp.c @@ -94,8 +94,8 @@ void chgrp_main(void) TT.group = xgetgrnamid(TT.group_name)->gr_gid; for (s=toys.optargs+1; *s; s++) - dirtree_handle_callback(dirtree_start(*s, toys.optflags&(FLAG_H|FLAG_L)), - do_chgrp);; + dirtree_flagread(*s, DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), + do_chgrp); if (CFG_TOYBOX_FREE && ischown) free(own); } diff --git a/toys/posix/cp.c b/toys/posix/cp.c index cb7e6e3b..0e6a2efa 100644 --- a/toys/posix/cp.c +++ b/toys/posix/cp.c @@ -430,12 +430,11 @@ void cp_main(void) if (rc) rc = rename(src, TT.destname); } - // Skip nonexistent sources + // Copy if we didn't mv, skipping nonexistent sources if (rc) { - if (errno!=EXDEV || - !(new = dirtree_start(src, toys.optflags&(FLAG_H|FLAG_L)))) + if (errno!=EXDEV || dirtree_flagread(src, DIRTREE_SHUTUP+ + DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), TT.callback)) perror_msg("bad '%s'", src); - else dirtree_handle_callback(new, TT.callback); } if (destdir) free(TT.destname); } diff --git a/toys/posix/du.c b/toys/posix/du.c index 77c7b6e2..2797b3f4 100644 --- a/toys/posix/du.c +++ b/toys/posix/du.c @@ -153,7 +153,7 @@ void du_main(void) // Loop over command line arguments, recursing through children for (args = toys.optc ? toys.optargs : noargs; *args; args++) - dirtree_handle_callback(dirtree_start(*args, toys.optflags&(FLAG_H|FLAG_L)), + dirtree_flagread(*args, DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), do_du); if (toys.optflags & FLAG_c) print(TT.total*512, 0); diff --git a/toys/posix/find.c b/toys/posix/find.c index a9c35f41..303aada2 100644 --- a/toys/posix/find.c +++ b/toys/posix/find.c @@ -556,7 +556,7 @@ void find_main(void) // Loop through paths for (i = 0; i < len; i++) - dirtree_handle_callback(dirtree_start(ss[i], toys.optflags&(FLAG_H|FLAG_L)), + dirtree_flagread(ss[i], DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), do_find); execdir(0, 1); diff --git a/toys/posix/ls.c b/toys/posix/ls.c index 4dcbe888..799631b1 100644 --- a/toys/posix/ls.c +++ b/toys/posix/ls.c @@ -540,12 +540,15 @@ void ls_main(void) if (toys.optflags & FLAG_d) toys.optflags &= ~FLAG_R; // Iterate through command line arguments, collecting directories and files. - // Non-absolute paths are relative to current directory. - TT.files = dirtree_start(0, 0); + // Non-absolute paths are relative to current directory. Top of tree is + // a dummy node to collect command line arguments into pseudo-directory. + TT.files = dirtree_add_node(0, 0, 0); TT.files->dirfd = AT_FDCWD; for (s = *toys.optargs ? toys.optargs : noargs; *s; s++) { - dt = dirtree_start(*s, !(toys.optflags&(FLAG_l|FLAG_d|FLAG_F)) || - (toys.optflags&(FLAG_L|FLAG_H))); + int sym = !(toys.optflags&(FLAG_l|FLAG_d|FLAG_F)) + || (toys.optflags&(FLAG_L|FLAG_H)); + + dt = dirtree_add_node(0, *s, DIRTREE_SYMFOLLOW*sym); // note: double_list->prev temporarirly goes in dirtree->parent if (dt) dlist_add_nomalloc((void *)&TT.files->child, (void *)dt); diff --git a/www/code.html b/www/code.html index b1c6d3f9..7e15e181 100644 --- a/www/code.html +++ b/www/code.html @@ -1198,16 +1198,32 @@ of functions.</p> <p>These functions do not call chdir() or rely on PATH_MAX. Instead they use openat() and friends, using one filehandle per directory level to -recurseinto subdirectories. (I.E. they can descend 1000 directories deep +recurse into subdirectories. (I.E. they can descend 1000 directories deep if setrlimit(RLIMIT_NOFILE) allows enough open filehandles, and the default in /proc/self/limits is generally 1024.)</p> +<p>There are two main ways to use dirtree: 1) assemble a tree of nodes +representing a snapshot of directory state and traverse them using the +->next and ->child pointers, or 2) traverse the tree calling a callback +function on each entry, and freeing its node afterwards. (You can also +combine the two, using the callback as a filter to determine which nodes +to keep.)</p> + <p>The basic dirtree functions are:</p> <ul> -<li><p><b>dirtree_read(char *path, int (*callback)(struct dirtree node))</b> - -recursively read directories, either applying callback() or returning -a tree of struct dirtree if callback is NULL.</p></li> +<li><p><b>struct dirtree *dirtree_read(char *path, int (*callback)(struct +dirtree node))</b> - recursively read files and directories, calling +callback() on each, and returning a tree of saved nodes (if any). +If path doesn't exist, returns DIRTREE_ABORTVAL. If callback is NULL, +returns a single node at that path.</p> + +<li><p><b>dirtree_notdotdot(struct dirtree *new)</b> - standard callback +which discards "." and ".." entries and returns DIRTREE_SAVE|DIRTREE_RECURSE +for everything else. Used directly, this assembles a snapshot tree of +the contents of this directory and its subdirectories +to be processed after dirtree_read() returns (by traversing the +struct dirtree's ->next and ->child pointers from the returned root node).</p> <li><p><b>dirtree_path(struct dirtree *node, int *plen)</b> - malloc() a string containing the path from the root of this tree to this node. If @@ -1215,21 +1231,21 @@ plen isn't NULL then *plen is how many extra bytes to malloc at the end of string.</p></li> <li><p><b>dirtree_parentfd(struct dirtree *node)</b> - return fd of -containing directory, for use with openat() and such.</p></li> +directory containing this node, for use with openat() and such.</p></li> </ul> -<p>The <b>dirtree_read()</b> function takes two arguments, a starting path for -the root of the tree, and a callback function. The callback takes a -<b>struct dirtree *</b> (from lib/lib.h) as its argument. If the callback is -NULL, the traversal uses a default callback (dirtree_notdotdot()) which -recursively assembles a tree of struct dirtree nodes for all files under -this directory and subdirectories (filtering out "." and ".." entries), -after which dirtree_read() returns the pointer to the root node of this -snapshot tree.</p> +<p>The <b>dirtree_read()</b> function is the standard way to start +directory traversal. It takes two arguments: a starting path for +the root of the tree, and a callback function. The callback() is called +on each directory entry, its argument is a fully populated +<b>struct dirtree *</b> (from lib/lib.h) describing the node, and its +return value tells the dirtree infrastructure what to do next.</p> -<p>Otherwise the callback() is called on each entry in the directory, -with struct dirtree * as its argument. This includes the initial -node created by dirtree_read() at the top of the tree.</p> +<p>(There's also a three argument version, +<b>dirtree_flagread(char *path, int flags, int (*callback)(struct +dirtree node))</b>, which lets you apply flags like DIRTREE_SYMFOLLOW and +DIRTREE_SHUTUP to reading the top node, but this only affects the top node. +Child nodes use the flags returned by callback().</p> <p><b>struct dirtree</b></p> @@ -1237,12 +1253,13 @@ node created by dirtree_read() at the top of the tree.</p> st</b> entries describing a file, plus a <b>char *symlink</b> which is NULL for non-symlinks.</p> -<p>During a callback function, the <b>int data</b> field of directory nodes -contains a dirfd (for use with the openat() family of functions). This is -generally used by calling dirtree_parentfd() on the callback's node argument. -For symlinks, data contains the length of the symlink string. On the second -callback from DIRTREE_COMEAGAIN (depth-first traversal) data = -1 for -all nodes (that's how you can tell it's the second callback).</p> +<p>During a callback function, the <b>int dirfd</b> field of directory nodes +contains a directory file descriptor (for use with the openat() family of +functions). This isn't usually used directly, intstead call dirtree_parentfd() +on the callback's node argument. The <b>char again</a> field is 0 for the +first callback on a node, and 1 on the second callback (triggered by returning +DIRTREE_COMEAGAIN on a directory, made after all children have been processed). +</p> <p>Users of this code may put anything they like into the <b>long extra</b> field. For example, "cp" and "mv" use this to store a dirfd for the destination @@ -1266,15 +1283,17 @@ return DIRTREE_ABORT from parent callbacks too.)</p></li> <li><p><b>DIRTREE_RECURSE</b> - Examine directory contents. Ignored for non-directory entries. The remaining flags only take effect when recursing into the children of a directory.</p></li> -<li><p><b>DIRTREE_COMEAGAIN</b> - Call the callback a second time after -examining all directory contents, allowing depth-first traversal. -On the second call, dirtree->data = -1.</p></li> +<li><p><b>DIRTREE_COMEAGAIN</b> - Call the callback on this node a second time +after examining all directory contents, allowing depth-first traversal. +On the second call, dirtree->again is nonzero.</p></li> <li><p><b>DIRTREE_SYMFOLLOW</b> - follow symlinks when populating children's <b>struct stat st</b> (by feeding a nonzero value to the symfollow argument of dirtree_add_node()), which means DIRTREE_RECURSE treats symlinks to directories as directories. (Avoiding infinite recursion is the callback's problem: the non-NULL dirtree->symlink can still distinguish between -them.)</p></li> +them. The "find" command follows ->parent up the tree to the root node +each time, checking to make sure that stat's dev and inode pair don't +match any ancestors.)</p></li> </ul> <p>Each struct dirtree contains three pointers (next, parent, and child) @@ -1299,15 +1318,15 @@ single malloc() (even char *symlink points to memory at the end of the node), so llist_free() works but its callback must descend into child nodes (freeing a tree, not just a linked list), plus whatever the user stored in extra.</p> -<p>The <b>dirtree_read</b>() function is a simple wrapper, calling <b>dirtree_add_node</b>() +<p>The <b>dirtree_flagread</b>() function is a simple wrapper, calling <b>dirtree_add_node</b>() to create a root node relative to the current directory, then calling -<b>handle_callback</b>() on that node (which recurses as instructed by the callback -return flags). Some commands (such as chgrp) bypass this wrapper, for example -to control whether or not to follow symlinks to the root node; symlinks +<b>dirtree_handle_callback</b>() on that node (which recurses as instructed by the callback +return flags). The flags argument primarily lets you +control whether or not to follow symlinks to the root node; symlinks listed on the command line are often treated differently than symlinks -encountered during recursive directory traversal). +encountered during recursive directory traversal. -<p>The ls command not only bypasses the wrapper, but never returns +<p>The ls command not only bypasses this wrapper, but never returns <b>DIRTREE_RECURSE</b> from the callback, instead calling <b>dirtree_recurse</b>() manually from elsewhere in the program. This gives ls -lR manual control of traversal order, which is neither depth first nor breadth first but |