From 8d95074b7d034188af8542aaea0306d3670d71be Mon Sep 17 00:00:00 2001
From: Rob Landley
Date: Mon, 7 Mar 2016 16:02:47 -0600
Subject: Cleanup pass on the dirtree infrastructure, in preparation for making
rm -r handle infinite depth. Fix docs, tweak dirtree_handle_callback()
semantics, remove dirtree_start() and don't export dirtree_handle_callback(),
instead offer dirtree_flagread(). (dirtree_read() is a wrapper around
dirtree_flagread passing 0 for flags.)
---
lib/dirtree.c | 39 ++++++++++++-------------
lib/lib.h | 7 +++--
toys/pending/diff.c | 2 +-
toys/pending/tar.c | 2 +-
toys/posix/chgrp.c | 4 +--
toys/posix/cp.c | 7 ++---
toys/posix/du.c | 2 +-
toys/posix/find.c | 2 +-
toys/posix/ls.c | 11 ++++---
www/code.html | 83 ++++++++++++++++++++++++++++++++---------------------
10 files changed, 90 insertions(+), 69 deletions(-)
diff --git a/lib/dirtree.c b/lib/dirtree.c
index 8b9f2993..cc1ab90c 100644
--- a/lib/dirtree.c
+++ b/lib/dirtree.c
@@ -17,7 +17,7 @@ static int notdotdot(char *name)
int dirtree_notdotdot(struct dirtree *catch)
{
// Should we skip "." and ".."?
- return notdotdot(catch->name) ? DIRTREE_SAVE|DIRTREE_RECURSE : 0;
+ return notdotdot(catch->name)*(DIRTREE_SAVE|DIRTREE_RECURSE);
}
// Create a dirtree node from a path, with stat and symlink info.
@@ -96,21 +96,18 @@ int dirtree_parentfd(struct dirtree *node)
return node->parent ? node->parent->dirfd : AT_FDCWD;
}
-// Handle callback for a node in the tree. Returns saved node(s) or NULL.
-//
-// By default, allocates a tree of struct dirtree, not following symlinks
-// If callback==NULL, or callback always returns 0, allocate tree of struct
-// dirtree and return root of tree. Otherwise call callback(node) on each
-// hit, free structures after use, and return NULL.
-//
+// Handle callback for a node in the tree. Returns saved node(s) if
+// callback returns DIRTREE_SAVE, otherwise frees consumed nodes and
+// returns NULL. If !callback return top node unchanged.
+// If !new return DIRTREE_ABORTVAL
struct dirtree *dirtree_handle_callback(struct dirtree *new,
int (*callback)(struct dirtree *node))
{
int flags;
- if (!new) return 0;
- if (!callback) callback = dirtree_notdotdot;
+ if (!new) return DIRTREE_ABORTVAL;
+ if (!callback) return new;
flags = callback(new);
if (S_ISDIR(new->st.st_mode)) {
@@ -176,19 +173,21 @@ int dirtree_recurse(struct dirtree *node,
return flags;
}
-// Create dirtree root
-struct dirtree *dirtree_start(char *name, int symfollow)
+// Create dirtree from path, using callback to filter nodes. If !callback
+// return just the top node. Use dirtree_notdotdot callback to allocate a
+// tree of struct dirtree nodes and return pointer to root node for later
+// processing.
+// Returns DIRTREE_ABORTVAL if path didn't exist (use DIRTREE_SHUTUP to handle
+// error message yourself).
+
+struct dirtree *dirtree_flagread(char *path, int flags,
+ int (*callback)(struct dirtree *node))
{
- return dirtree_add_node(0, name, DIRTREE_SYMFOLLOW*!!symfollow);
+ return dirtree_handle_callback(dirtree_add_node(0, path, flags), callback);
}
-// Create dirtree from path, using callback to filter nodes.
-// If callback == NULL allocate a tree of struct dirtree nodes and return
-// pointer to root node.
-
+// Common case
struct dirtree *dirtree_read(char *path, int (*callback)(struct dirtree *node))
{
- struct dirtree *root = dirtree_start(path, 0);
-
- return root ? dirtree_handle_callback(root, callback) : DIRTREE_ABORTVAL;
+ return dirtree_flagread(path, 0, callback);
}
diff --git a/lib/lib.h b/lib/lib.h
index c8cff6ef..917be0c1 100644
--- a/lib/lib.h
+++ b/lib/lib.h
@@ -62,6 +62,8 @@ void get_optflags(void);
#define DIRTREE_SYMFOLLOW 8
// Don't warn about failure to stat
#define DIRTREE_SHUTUP 16
+// Breadth first traversal, conserves filehandles at the expense of memory
+#define DIRTREE_BREADTH 32
// Don't look at any more files in this directory.
#define DIRTREE_ABORT 256
@@ -77,15 +79,14 @@ struct dirtree {
char name[];
};
-struct dirtree *dirtree_start(char *name, int symfollow);
struct dirtree *dirtree_add_node(struct dirtree *p, char *name, int flags);
char *dirtree_path(struct dirtree *node, int *plen);
int dirtree_notdotdot(struct dirtree *catch);
int dirtree_parentfd(struct dirtree *node);
-struct dirtree *dirtree_handle_callback(struct dirtree *new,
- int (*callback)(struct dirtree *node));
int dirtree_recurse(struct dirtree *node, int (*callback)(struct dirtree *node),
int symfollow);
+struct dirtree *dirtree_flagread(char *path, int flags,
+ int (*callback)(struct dirtree *node));
struct dirtree *dirtree_read(char *path, int (*callback)(struct dirtree *node));
// help.c
diff --git a/toys/pending/diff.c b/toys/pending/diff.c
index 80238614..da6c13a0 100644
--- a/toys/pending/diff.c
+++ b/toys/pending/diff.c
@@ -798,7 +798,7 @@ void diff_main(void)
if (S_ISDIR(st[0].st_mode) && S_ISDIR(st[1].st_mode)) {
for (j = 0; j < 2; j++) {
memset(&dir[j], 0, sizeof(dir));
- dirtree_handle_callback(dirtree_start(files[j], 1), list_dir);
+ dirtree_flagread(files[j], DIRTREE_SYMFOLLOW, list_dir);
dir[j].nr_elm = TT.size; //size updated in list_dir
qsort(&(dir[j].list[1]), (TT.size - 1), sizeof(char*), cmp);
diff --git a/toys/pending/tar.c b/toys/pending/tar.c
index c5043087..384199ea 100644
--- a/toys/pending/tar.c
+++ b/toys/pending/tar.c
@@ -788,7 +788,7 @@ void tar_main(void)
for (tmp = TT.inc; tmp; tmp = tmp->next) {
TT.handle = tar_hdl;
//recurse thru dir and add files to archive
- dirtree_handle_callback(dirtree_start(tmp->arg, toys.optflags & FLAG_h),
+ dirtree_flagread(tmp->arg, DIRTREE_SYMFOLLOW*!!(toys.optflags&FLAG_h),
add_to_tar);
}
memset(toybuf, 0, 1024);
diff --git a/toys/posix/chgrp.c b/toys/posix/chgrp.c
index 6b95c6ad..e0690c93 100644
--- a/toys/posix/chgrp.c
+++ b/toys/posix/chgrp.c
@@ -94,8 +94,8 @@ void chgrp_main(void)
TT.group = xgetgrnamid(TT.group_name)->gr_gid;
for (s=toys.optargs+1; *s; s++)
- dirtree_handle_callback(dirtree_start(*s, toys.optflags&(FLAG_H|FLAG_L)),
- do_chgrp);;
+ dirtree_flagread(*s, DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)),
+ do_chgrp);
if (CFG_TOYBOX_FREE && ischown) free(own);
}
diff --git a/toys/posix/cp.c b/toys/posix/cp.c
index cb7e6e3b..0e6a2efa 100644
--- a/toys/posix/cp.c
+++ b/toys/posix/cp.c
@@ -430,12 +430,11 @@ void cp_main(void)
if (rc) rc = rename(src, TT.destname);
}
- // Skip nonexistent sources
+ // Copy if we didn't mv, skipping nonexistent sources
if (rc) {
- if (errno!=EXDEV ||
- !(new = dirtree_start(src, toys.optflags&(FLAG_H|FLAG_L))))
+ if (errno!=EXDEV || dirtree_flagread(src, DIRTREE_SHUTUP+
+ DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)), TT.callback))
perror_msg("bad '%s'", src);
- else dirtree_handle_callback(new, TT.callback);
}
if (destdir) free(TT.destname);
}
diff --git a/toys/posix/du.c b/toys/posix/du.c
index 77c7b6e2..2797b3f4 100644
--- a/toys/posix/du.c
+++ b/toys/posix/du.c
@@ -153,7 +153,7 @@ void du_main(void)
// Loop over command line arguments, recursing through children
for (args = toys.optc ? toys.optargs : noargs; *args; args++)
- dirtree_handle_callback(dirtree_start(*args, toys.optflags&(FLAG_H|FLAG_L)),
+ dirtree_flagread(*args, DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)),
do_du);
if (toys.optflags & FLAG_c) print(TT.total*512, 0);
diff --git a/toys/posix/find.c b/toys/posix/find.c
index a9c35f41..303aada2 100644
--- a/toys/posix/find.c
+++ b/toys/posix/find.c
@@ -556,7 +556,7 @@ void find_main(void)
// Loop through paths
for (i = 0; i < len; i++)
- dirtree_handle_callback(dirtree_start(ss[i], toys.optflags&(FLAG_H|FLAG_L)),
+ dirtree_flagread(ss[i], DIRTREE_SYMFOLLOW*!!(toys.optflags&(FLAG_H|FLAG_L)),
do_find);
execdir(0, 1);
diff --git a/toys/posix/ls.c b/toys/posix/ls.c
index 4dcbe888..799631b1 100644
--- a/toys/posix/ls.c
+++ b/toys/posix/ls.c
@@ -540,12 +540,15 @@ void ls_main(void)
if (toys.optflags & FLAG_d) toys.optflags &= ~FLAG_R;
// Iterate through command line arguments, collecting directories and files.
- // Non-absolute paths are relative to current directory.
- TT.files = dirtree_start(0, 0);
+ // Non-absolute paths are relative to current directory. Top of tree is
+ // a dummy node to collect command line arguments into pseudo-directory.
+ TT.files = dirtree_add_node(0, 0, 0);
TT.files->dirfd = AT_FDCWD;
for (s = *toys.optargs ? toys.optargs : noargs; *s; s++) {
- dt = dirtree_start(*s, !(toys.optflags&(FLAG_l|FLAG_d|FLAG_F)) ||
- (toys.optflags&(FLAG_L|FLAG_H)));
+ int sym = !(toys.optflags&(FLAG_l|FLAG_d|FLAG_F))
+ || (toys.optflags&(FLAG_L|FLAG_H));
+
+ dt = dirtree_add_node(0, *s, DIRTREE_SYMFOLLOW*sym);
// note: double_list->prev temporarirly goes in dirtree->parent
if (dt) dlist_add_nomalloc((void *)&TT.files->child, (void *)dt);
diff --git a/www/code.html b/www/code.html
index b1c6d3f9..7e15e181 100644
--- a/www/code.html
+++ b/www/code.html
@@ -1198,16 +1198,32 @@ of functions.
These functions do not call chdir() or rely on PATH_MAX. Instead they
use openat() and friends, using one filehandle per directory level to
-recurseinto subdirectories. (I.E. they can descend 1000 directories deep
+recurse into subdirectories. (I.E. they can descend 1000 directories deep
if setrlimit(RLIMIT_NOFILE) allows enough open filehandles, and the default
in /proc/self/limits is generally 1024.)
+There are two main ways to use dirtree: 1) assemble a tree of nodes
+representing a snapshot of directory state and traverse them using the
+->next and ->child pointers, or 2) traverse the tree calling a callback
+function on each entry, and freeing its node afterwards. (You can also
+combine the two, using the callback as a filter to determine which nodes
+to keep.)
+
The basic dirtree functions are:
-dirtree_read(char *path, int (*callback)(struct dirtree node)) -
-recursively read directories, either applying callback() or returning
-a tree of struct dirtree if callback is NULL.
+struct dirtree *dirtree_read(char *path, int (*callback)(struct
+dirtree node)) - recursively read files and directories, calling
+callback() on each, and returning a tree of saved nodes (if any).
+If path doesn't exist, returns DIRTREE_ABORTVAL. If callback is NULL,
+returns a single node at that path.
+
+dirtree_notdotdot(struct dirtree *new) - standard callback
+which discards "." and ".." entries and returns DIRTREE_SAVE|DIRTREE_RECURSE
+for everything else. Used directly, this assembles a snapshot tree of
+the contents of this directory and its subdirectories
+to be processed after dirtree_read() returns (by traversing the
+struct dirtree's ->next and ->child pointers from the returned root node).
dirtree_path(struct dirtree *node, int *plen) - malloc() a
string containing the path from the root of this tree to this node. If
@@ -1215,21 +1231,21 @@ plen isn't NULL then *plen is how many extra bytes to malloc at the end
of string.
dirtree_parentfd(struct dirtree *node) - return fd of
-containing directory, for use with openat() and such.
+directory containing this node, for use with openat() and such.
-The dirtree_read() function takes two arguments, a starting path for
-the root of the tree, and a callback function. The callback takes a
-struct dirtree * (from lib/lib.h) as its argument. If the callback is
-NULL, the traversal uses a default callback (dirtree_notdotdot()) which
-recursively assembles a tree of struct dirtree nodes for all files under
-this directory and subdirectories (filtering out "." and ".." entries),
-after which dirtree_read() returns the pointer to the root node of this
-snapshot tree.
+The dirtree_read() function is the standard way to start
+directory traversal. It takes two arguments: a starting path for
+the root of the tree, and a callback function. The callback() is called
+on each directory entry, its argument is a fully populated
+struct dirtree * (from lib/lib.h) describing the node, and its
+return value tells the dirtree infrastructure what to do next.
-Otherwise the callback() is called on each entry in the directory,
-with struct dirtree * as its argument. This includes the initial
-node created by dirtree_read() at the top of the tree.
+(There's also a three argument version,
+dirtree_flagread(char *path, int flags, int (*callback)(struct
+dirtree node)), which lets you apply flags like DIRTREE_SYMFOLLOW and
+DIRTREE_SHUTUP to reading the top node, but this only affects the top node.
+Child nodes use the flags returned by callback().
struct dirtree
@@ -1237,12 +1253,13 @@ node created by dirtree_read() at the top of the tree.
st entries describing a file, plus a char *symlink
which is NULL for non-symlinks.
-During a callback function, the int data field of directory nodes
-contains a dirfd (for use with the openat() family of functions). This is
-generally used by calling dirtree_parentfd() on the callback's node argument.
-For symlinks, data contains the length of the symlink string. On the second
-callback from DIRTREE_COMEAGAIN (depth-first traversal) data = -1 for
-all nodes (that's how you can tell it's the second callback).
+During a callback function, the int dirfd field of directory nodes
+contains a directory file descriptor (for use with the openat() family of
+functions). This isn't usually used directly, intstead call dirtree_parentfd()
+on the callback's node argument. The char again field is 0 for the
+first callback on a node, and 1 on the second callback (triggered by returning
+DIRTREE_COMEAGAIN on a directory, made after all children have been processed).
+
Users of this code may put anything they like into the long extra
field. For example, "cp" and "mv" use this to store a dirfd for the destination
@@ -1266,15 +1283,17 @@ return DIRTREE_ABORT from parent callbacks too.)
DIRTREE_RECURSE - Examine directory contents. Ignored for
non-directory entries. The remaining flags only take effect when
recursing into the children of a directory.
-DIRTREE_COMEAGAIN - Call the callback a second time after
-examining all directory contents, allowing depth-first traversal.
-On the second call, dirtree->data = -1.
+DIRTREE_COMEAGAIN - Call the callback on this node a second time
+after examining all directory contents, allowing depth-first traversal.
+On the second call, dirtree->again is nonzero.
DIRTREE_SYMFOLLOW - follow symlinks when populating children's
struct stat st (by feeding a nonzero value to the symfollow argument of
dirtree_add_node()), which means DIRTREE_RECURSE treats symlinks to
directories as directories. (Avoiding infinite recursion is the callback's
problem: the non-NULL dirtree->symlink can still distinguish between
-them.)
+them. The "find" command follows ->parent up the tree to the root node
+each time, checking to make sure that stat's dev and inode pair don't
+match any ancestors.)
Each struct dirtree contains three pointers (next, parent, and child)
@@ -1299,15 +1318,15 @@ single malloc() (even char *symlink points to memory at the end of the node),
so llist_free() works but its callback must descend into child nodes (freeing
a tree, not just a linked list), plus whatever the user stored in extra.
-The dirtree_read() function is a simple wrapper, calling dirtree_add_node()
+
The dirtree_flagread() function is a simple wrapper, calling dirtree_add_node()
to create a root node relative to the current directory, then calling
-handle_callback() on that node (which recurses as instructed by the callback
-return flags). Some commands (such as chgrp) bypass this wrapper, for example
-to control whether or not to follow symlinks to the root node; symlinks
+dirtree_handle_callback() on that node (which recurses as instructed by the callback
+return flags). The flags argument primarily lets you
+control whether or not to follow symlinks to the root node; symlinks
listed on the command line are often treated differently than symlinks
-encountered during recursive directory traversal).
+encountered during recursive directory traversal.
-
The ls command not only bypasses the wrapper, but never returns
+
The ls command not only bypasses this wrapper, but never returns
DIRTREE_RECURSE from the callback, instead calling dirtree_recurse() manually
from elsewhere in the program. This gives ls -lR manual control
of traversal order, which is neither depth first nor breadth first but
--
cgit v1.2.3