--- a/src/utils/fstreewalk.cpp
+++ b/src/utils/fstreewalk.cpp
@@ -38,11 +38,18 @@
using namespace std;
#endif /* NO_NAMESPACES */
+const int FsTreeWalker::FtwTravMask = FtwTravNatural|
+ FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth;
+
class FsTreeWalker::Internal {
int options;
+ int depthswitch;
stringstream reason;
list<string> skippedNames;
list<string> skippedPaths;
+ // When doing Breadth or FilesThenDirs traversal, we keep a list
+ // of directory paths to be processed, and we do not recurse.
+ list<string> dirs;
int errors;
void logsyserr(const char *call, const string ¶m)
{
@@ -58,6 +65,7 @@
data = new Internal;
if (data) {
data->options = opts;
+ data->depthswitch = 4;
data->errors = 0;
}
}
@@ -67,10 +75,11 @@
delete data;
}
-void FsTreeWalker::setOpts(Options opts)
+void FsTreeWalker::setOpts(Options opts, int depthswitch)
{
if (data) {
data->options = opts;
+ data->depthswitch = depthswitch;
}
}
@@ -152,20 +161,109 @@
return false;
}
+static inline int slashcount(const string& p)
+{
+ int n = 0;
+ for (unsigned int i = 0; i < p.size(); i++)
+ if (p[i] == '/')
+ n++;
+ return n;
+}
+
FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
FsTreeWalkerCB& cb)
{
string top = (data->options & FtwNoCanon) ? _top : path_canon(_top);
+ if ((data->options & FtwTravMask) == 0) {
+ data->options |= FtwTravNatural;
+ }
+
+ int basedepth = slashcount(top); // Only used for breadthThenDepth
+
struct stat st;
- int statret;
// We always follow symlinks at this point. Makes more sense.
- statret = stat(top.c_str(), &st);
- if (statret == -1) {
+ if (stat(top.c_str(), &st) == -1) {
data->logsyserr("stat", top);
return FtwError;
}
- return iwalk(top, &st, cb);
+
+ // Recursive version, using the call stack to store state. iwalk
+ // will process files and recursively descend into subdirs in
+ // physical order of the current directory.
+ if ((data->options & FtwTravMask) == FtwTravNatural) {
+ return iwalk(top, &st, cb);
+ }
+
+ // Breadth first of filesThenDirs semi-depth first order
+ // Managing lists of directories to be visited later, in breadth or
+ // depth order. Null marker are inserted in the list to indicate
+ // father directory changes (avoids computing parents all the time).
+ data->dirs.push_back(top);
+ Status status;
+ while (!data->dirs.empty()) {
+ string dir, nfather;
+ if (data->options & (FtwTravBreadth|FtwTravBreadthThenDepth)) {
+ // Breadth first, pop and process an older dir at the
+ // front of the list. This will add any child dirs at the
+ // back
+ dir = data->dirs.front();
+ data->dirs.pop_front();
+ if (dir == "") {
+ // Father change marker.
+ if (data->dirs.empty())
+ break;
+ dir = data->dirs.front();
+ data->dirs.pop_front();
+ nfather = path_getfather(dir);
+ if (data->options & FtwTravBreadthThenDepth) {
+ // Check if new depth warrants switch to depth first
+ // traversal (will happen on next loop iteration).
+ int curdepth = slashcount(dir) - basedepth;
+ if (curdepth >= data->depthswitch) {
+ //fprintf(stderr, "SWITCHING TO DEPTH FIRST\n");
+ data->options &= ~FtwTravMask;
+ data->options |= FtwTravFilesThenDirs;
+ }
+ }
+ }
+ } else {
+ // Depth first, pop and process latest dir
+ dir = data->dirs.back();
+ data->dirs.pop_back();
+ if (dir == "") {
+ // Father change marker.
+ if (data->dirs.empty())
+ break;
+ dir = data->dirs.back();
+ data->dirs.pop_back();
+ nfather = path_getfather(dir);
+ }
+ }
+
+ // If changing parent directory, advise our user.
+ if (!nfather.empty()) {
+ if (stat(nfather.c_str(), &st) == -1) {
+ data->logsyserr("stat", nfather);
+ return FtwError;
+ }
+ if ((status = cb.processone(nfather, &st, FtwDirReturn)) &
+ (FtwStop|FtwError)) {
+ return status;
+ }
+ }
+
+ if (stat(dir.c_str(), &st) == -1) {
+ data->logsyserr("stat", dir);
+ return FtwError;
+ }
+ // iwalk will not recurse in this case, just process file entries
+ // and append subdir entries to the list.
+ status = iwalk(dir, &st, cb);
+ if (status != FtwOk)
+ return status;
+ }
+ return FtwOk;
}
// Note that the 'norecurse' flag is handled as part of the directory read.
@@ -176,8 +274,9 @@
FsTreeWalkerCB& cb)
{
Status status = FtwOk;
-
- // Handle the parameter
+ bool nullpush = false;
+
+ // Tell user to process the top entry itself
if (S_ISDIR(stp->st_mode)) {
if ((status = cb.processone(top, stp, FtwDirEnter)) &
(FtwStop|FtwError)) {
@@ -205,6 +304,8 @@
struct dirent *ent;
while ((ent = readdir(d)) != 0) {
+ string fn;
+ struct stat st;
// Skip . and ..
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
continue;
@@ -212,55 +313,60 @@
// Skipped file names match ?
if (!data->skippedNames.empty()) {
if (inSkippedNames(ent->d_name))
- goto skip;
+ continue;
}
- {
- string fn = path_cat(top, ent->d_name);
-
- struct stat st;
- int statret = (data->options & FtwFollow) ? stat(fn.c_str(), &st) :
- lstat(fn.c_str(), &st);
- if (statret == -1) {
- data->logsyserr("stat", fn);
- continue;
- }
- if (!data->skippedPaths.empty()) {
- // We do not check the ancestors. This means that you can have
- // a topdirs member under a skippedPath, to index a portion of
- // an ignored area. This is the way it had always worked, but
- // this was broken by 1.13.00 and the systematic use of
- // FNM_LEADING_DIR
- if (inSkippedPaths(fn, false))
- goto skip;
- }
-
- if (S_ISDIR(st.st_mode)) {
- if (data->options & FtwNoRecurse) {
- status = cb.processone(fn, &st, FtwDirEnter);
- } else {
- status = iwalk(fn, &st, cb);
- }
- if (status & (FtwStop|FtwError))
- goto out;
-
- // No DirReturn call when not recursing
- if (!(data->options & FtwNoRecurse))
- if ((status = cb.processone(top, &st, FtwDirReturn))
- & (FtwStop|FtwError))
- goto out;
- } else if (S_ISREG(st.st_mode)) {
- if ((status = cb.processone(fn, &st, FtwRegular)) &
- (FtwStop|FtwError)) {
- goto out;
- }
- }
- }
-
- skip: ;
-
- // We skip other file types (devices etc...)
- }
+ fn = path_cat(top, ent->d_name);
+ int statret = (data->options & FtwFollow) ? stat(fn.c_str(), &st) :
+ lstat(fn.c_str(), &st);
+ if (statret == -1) {
+ data->logsyserr("stat", fn);
+ continue;
+ }
+ if (!data->skippedPaths.empty()) {
+ // We do not check the ancestors. This means that you can have
+ // a topdirs member under a skippedPath, to index a portion of
+ // an ignored area. This is the way it had always worked, but
+ // this was broken by 1.13.00 and the systematic use of
+ // FNM_LEADING_DIR
+ if (inSkippedPaths(fn, false))
+ continue;
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ if (data->options & FtwNoRecurse) {
+ status = cb.processone(fn, &st, FtwDirEnter);
+ } else {
+ if (data->options & FtwTravNatural) {
+ status = iwalk(fn, &st, cb);
+ } else {
+ // If first subdir, push marker to separate
+ // from entries for other dir. This is to help
+ // with generating DirReturn callbacks
+ if (!nullpush) {
+ if (!data->dirs.empty() && data->dirs.back() != "")
+ data->dirs.push_back("");
+ nullpush = true;
+ }
+ data->dirs.push_back(fn);
+ continue;
+ }
+ }
+ // Note: only recursive case gets here.
+ if (status & (FtwStop|FtwError))
+ goto out;
+ if (!(data->options & FtwNoRecurse))
+ if ((status = cb.processone(top, &st, FtwDirReturn))
+ & (FtwStop|FtwError))
+ goto out;
+ } else if (S_ISREG(st.st_mode)) {
+ if ((status = cb.processone(fn, &st, FtwRegular)) &
+ (FtwStop|FtwError)) {
+ goto out;
+ }
+ }
+ // We ignore other file types (devices etc...)
+ } // readdir loop
out:
if (d)
@@ -283,6 +389,9 @@
#define OPT_P 0x4
#define OPT_r 0x8
#define OPT_c 0x10
+#define OPT_b 0x20
+#define OPT_d 0x40
+#define OPT_m 0x80
class myCB : public FsTreeWalkerCB {
public:
@@ -306,10 +415,25 @@
static const char *thisprog;
+// Note that breadth first sorting is relatively expensive: less inode
+// locality, more disk usage (and also more user memory usage, does not appear
+// here):
+// y$ time trfstreewalk -d / 2>&1 > /tmp/alld
+// real 15m32.520s user 0m4.815s sys 0m33.014s
+// y$ time trfstreewalk -b / 2>&1 > /tmp/allb
+// real 17m3.380s user 0m4.658s sys 0m34.473s
+// y$ wc /tmp/allb /tmp/alld
+// 2618155 3208127 221619857 /tmp/allb
+// 2618154 3208126 221619847 /tmp/alld
+// (diff due to changes in /proc)
+
static char usage [] =
"trfstreewalk [-p pattern] [-P ignpath] [-r] [-c] topdir\n"
" -r : norecurse\n"
" -c : no path canonification\n"
+" -b : use breadth first walk\n"
+" -d : use almost depth first (dir files, then subdirs)\n"
+" -m : use breadth up to 4 deep then switch to -d\n"
;
static void
Usage(void)
@@ -332,8 +456,11 @@
Usage();
while (**argv)
switch (*(*argv)++) {
+ case 'b': op_flags |= OPT_b; break;
+ case 'c': op_flags |= OPT_c; break;
+ case 'd': op_flags |= OPT_d; break;
+ case 'm': op_flags |= OPT_m; break;
case 'r': op_flags |= OPT_r; break;
- case 'c': op_flags |= OPT_c; break;
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
patterns.push_back(*(++argv));
argc--;
@@ -356,6 +483,14 @@
opt |= FsTreeWalker::FtwNoRecurse;
if (op_flags & OPT_c)
opt |= FsTreeWalker::FtwNoCanon;
+
+ if (op_flags & OPT_b)
+ opt |= FsTreeWalker::FtwTravBreadth;
+ else if (op_flags & OPT_d)
+ opt |= FsTreeWalker::FtwTravFilesThenDirs;
+ else if (op_flags & OPT_m)
+ opt |= FsTreeWalker::FtwTravBreadthThenDepth;
+
FsTreeWalker walker(opt);
walker.setSkippedNames(patterns);
walker.setSkippedPaths(paths);