|
a/src/utils/fstreewalk.cpp |
|
b/src/utils/fstreewalk.cpp |
|
... |
|
... |
36 |
|
36 |
|
37 |
#ifndef NO_NAMESPACES
|
37 |
#ifndef NO_NAMESPACES
|
38 |
using namespace std;
|
38 |
using namespace std;
|
39 |
#endif /* NO_NAMESPACES */
|
39 |
#endif /* NO_NAMESPACES */
|
40 |
|
40 |
|
|
|
41 |
const int FsTreeWalker::FtwTravMask = FtwTravNatural|
|
|
|
42 |
FtwTravBreadth|FtwTravFilesThenDirs|FtwTravBreadthThenDepth;
|
|
|
43 |
|
41 |
class FsTreeWalker::Internal {
|
44 |
class FsTreeWalker::Internal {
|
42 |
int options;
|
45 |
int options;
|
|
|
46 |
int depthswitch;
|
43 |
stringstream reason;
|
47 |
stringstream reason;
|
44 |
list<string> skippedNames;
|
48 |
list<string> skippedNames;
|
45 |
list<string> skippedPaths;
|
49 |
list<string> skippedPaths;
|
|
|
50 |
// When doing Breadth or FilesThenDirs traversal, we keep a list
|
|
|
51 |
// of directory paths to be processed, and we do not recurse.
|
|
|
52 |
list<string> dirs;
|
46 |
int errors;
|
53 |
int errors;
|
47 |
void logsyserr(const char *call, const string ¶m)
|
54 |
void logsyserr(const char *call, const string ¶m)
|
48 |
{
|
55 |
{
|
49 |
errors++;
|
56 |
errors++;
|
50 |
reason << call << "(" << param << ") : " << errno << " : " <<
|
57 |
reason << call << "(" << param << ") : " << errno << " : " <<
|
|
... |
|
... |
56 |
FsTreeWalker::FsTreeWalker(int opts)
|
63 |
FsTreeWalker::FsTreeWalker(int opts)
|
57 |
{
|
64 |
{
|
58 |
data = new Internal;
|
65 |
data = new Internal;
|
59 |
if (data) {
|
66 |
if (data) {
|
60 |
data->options = opts;
|
67 |
data->options = opts;
|
|
|
68 |
data->depthswitch = 4;
|
61 |
data->errors = 0;
|
69 |
data->errors = 0;
|
62 |
}
|
70 |
}
|
63 |
}
|
71 |
}
|
64 |
|
72 |
|
65 |
FsTreeWalker::~FsTreeWalker()
|
73 |
FsTreeWalker::~FsTreeWalker()
|
66 |
{
|
74 |
{
|
67 |
delete data;
|
75 |
delete data;
|
68 |
}
|
76 |
}
|
69 |
|
77 |
|
70 |
void FsTreeWalker::setOpts(Options opts)
|
78 |
void FsTreeWalker::setOpts(Options opts, int depthswitch)
|
71 |
{
|
79 |
{
|
72 |
if (data) {
|
80 |
if (data) {
|
73 |
data->options = opts;
|
81 |
data->options = opts;
|
|
|
82 |
data->depthswitch = depthswitch;
|
74 |
}
|
83 |
}
|
75 |
}
|
84 |
}
|
76 |
|
85 |
|
77 |
string FsTreeWalker::getReason()
|
86 |
string FsTreeWalker::getReason()
|
78 |
{
|
87 |
{
|
|
... |
|
... |
150 |
}
|
159 |
}
|
151 |
}
|
160 |
}
|
152 |
return false;
|
161 |
return false;
|
153 |
}
|
162 |
}
|
154 |
|
163 |
|
|
|
164 |
static inline int slashcount(const string& p)
|
|
|
165 |
{
|
|
|
166 |
int n = 0;
|
|
|
167 |
for (unsigned int i = 0; i < p.size(); i++)
|
|
|
168 |
if (p[i] == '/')
|
|
|
169 |
n++;
|
|
|
170 |
return n;
|
|
|
171 |
}
|
|
|
172 |
|
155 |
FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
|
173 |
FsTreeWalker::Status FsTreeWalker::walk(const string& _top,
|
156 |
FsTreeWalkerCB& cb)
|
174 |
FsTreeWalkerCB& cb)
|
157 |
{
|
175 |
{
|
158 |
string top = (data->options & FtwNoCanon) ? _top : path_canon(_top);
|
176 |
string top = (data->options & FtwNoCanon) ? _top : path_canon(_top);
|
159 |
|
177 |
|
|
|
178 |
if ((data->options & FtwTravMask) == 0) {
|
|
|
179 |
data->options |= FtwTravNatural;
|
|
|
180 |
}
|
|
|
181 |
|
|
|
182 |
int basedepth = slashcount(top); // Only used for breadthThenDepth
|
|
|
183 |
|
160 |
struct stat st;
|
184 |
struct stat st;
|
161 |
int statret;
|
|
|
162 |
// We always follow symlinks at this point. Makes more sense.
|
185 |
// We always follow symlinks at this point. Makes more sense.
|
163 |
statret = stat(top.c_str(), &st);
|
186 |
if (stat(top.c_str(), &st) == -1) {
|
164 |
if (statret == -1) {
|
|
|
165 |
data->logsyserr("stat", top);
|
187 |
data->logsyserr("stat", top);
|
166 |
return FtwError;
|
188 |
return FtwError;
|
167 |
}
|
189 |
}
|
|
|
190 |
|
|
|
191 |
// Recursive version, using the call stack to store state. iwalk
|
|
|
192 |
// will process files and recursively descend into subdirs in
|
|
|
193 |
// physical order of the current directory.
|
|
|
194 |
if ((data->options & FtwTravMask) == FtwTravNatural) {
|
168 |
return iwalk(top, &st, cb);
|
195 |
return iwalk(top, &st, cb);
|
|
|
196 |
}
|
|
|
197 |
|
|
|
198 |
// Breadth first of filesThenDirs semi-depth first order
|
|
|
199 |
// Managing lists of directories to be visited later, in breadth or
|
|
|
200 |
// depth order. Null marker are inserted in the list to indicate
|
|
|
201 |
// father directory changes (avoids computing parents all the time).
|
|
|
202 |
data->dirs.push_back(top);
|
|
|
203 |
Status status;
|
|
|
204 |
while (!data->dirs.empty()) {
|
|
|
205 |
string dir, nfather;
|
|
|
206 |
if (data->options & (FtwTravBreadth|FtwTravBreadthThenDepth)) {
|
|
|
207 |
// Breadth first, pop and process an older dir at the
|
|
|
208 |
// front of the list. This will add any child dirs at the
|
|
|
209 |
// back
|
|
|
210 |
dir = data->dirs.front();
|
|
|
211 |
data->dirs.pop_front();
|
|
|
212 |
if (dir == "") {
|
|
|
213 |
// Father change marker.
|
|
|
214 |
if (data->dirs.empty())
|
|
|
215 |
break;
|
|
|
216 |
dir = data->dirs.front();
|
|
|
217 |
data->dirs.pop_front();
|
|
|
218 |
nfather = path_getfather(dir);
|
|
|
219 |
if (data->options & FtwTravBreadthThenDepth) {
|
|
|
220 |
// Check if new depth warrants switch to depth first
|
|
|
221 |
// traversal (will happen on next loop iteration).
|
|
|
222 |
int curdepth = slashcount(dir) - basedepth;
|
|
|
223 |
if (curdepth >= data->depthswitch) {
|
|
|
224 |
//fprintf(stderr, "SWITCHING TO DEPTH FIRST\n");
|
|
|
225 |
data->options &= ~FtwTravMask;
|
|
|
226 |
data->options |= FtwTravFilesThenDirs;
|
|
|
227 |
}
|
|
|
228 |
}
|
|
|
229 |
}
|
|
|
230 |
} else {
|
|
|
231 |
// Depth first, pop and process latest dir
|
|
|
232 |
dir = data->dirs.back();
|
|
|
233 |
data->dirs.pop_back();
|
|
|
234 |
if (dir == "") {
|
|
|
235 |
// Father change marker.
|
|
|
236 |
if (data->dirs.empty())
|
|
|
237 |
break;
|
|
|
238 |
dir = data->dirs.back();
|
|
|
239 |
data->dirs.pop_back();
|
|
|
240 |
nfather = path_getfather(dir);
|
|
|
241 |
}
|
|
|
242 |
}
|
|
|
243 |
|
|
|
244 |
// If changing parent directory, advise our user.
|
|
|
245 |
if (!nfather.empty()) {
|
|
|
246 |
if (stat(nfather.c_str(), &st) == -1) {
|
|
|
247 |
data->logsyserr("stat", nfather);
|
|
|
248 |
return FtwError;
|
|
|
249 |
}
|
|
|
250 |
if ((status = cb.processone(nfather, &st, FtwDirReturn)) &
|
|
|
251 |
(FtwStop|FtwError)) {
|
|
|
252 |
return status;
|
|
|
253 |
}
|
|
|
254 |
}
|
|
|
255 |
|
|
|
256 |
if (stat(dir.c_str(), &st) == -1) {
|
|
|
257 |
data->logsyserr("stat", dir);
|
|
|
258 |
return FtwError;
|
|
|
259 |
}
|
|
|
260 |
// iwalk will not recurse in this case, just process file entries
|
|
|
261 |
// and append subdir entries to the list.
|
|
|
262 |
status = iwalk(dir, &st, cb);
|
|
|
263 |
if (status != FtwOk)
|
|
|
264 |
return status;
|
|
|
265 |
}
|
|
|
266 |
return FtwOk;
|
169 |
}
|
267 |
}
|
170 |
|
268 |
|
171 |
// Note that the 'norecurse' flag is handled as part of the directory read.
|
269 |
// Note that the 'norecurse' flag is handled as part of the directory read.
|
172 |
// This means that we always go into the top 'walk()' parameter if it is a
|
270 |
// This means that we always go into the top 'walk()' parameter if it is a
|
173 |
// directory, even if norecurse is set. Bug or Feature ?
|
271 |
// directory, even if norecurse is set. Bug or Feature ?
|
174 |
FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
272 |
FsTreeWalker::Status FsTreeWalker::iwalk(const string &top,
|
175 |
struct stat *stp,
|
273 |
struct stat *stp,
|
176 |
FsTreeWalkerCB& cb)
|
274 |
FsTreeWalkerCB& cb)
|
177 |
{
|
275 |
{
|
178 |
Status status = FtwOk;
|
276 |
Status status = FtwOk;
|
|
|
277 |
bool nullpush = false;
|
179 |
|
278 |
|
180 |
// Handle the parameter
|
279 |
// Tell user to process the top entry itself
|
181 |
if (S_ISDIR(stp->st_mode)) {
|
280 |
if (S_ISDIR(stp->st_mode)) {
|
182 |
if ((status = cb.processone(top, stp, FtwDirEnter)) &
|
281 |
if ((status = cb.processone(top, stp, FtwDirEnter)) &
|
183 |
(FtwStop|FtwError)) {
|
282 |
(FtwStop|FtwError)) {
|
184 |
return status;
|
283 |
return status;
|
185 |
}
|
284 |
}
|
|
... |
|
... |
203 |
}
|
302 |
}
|
204 |
}
|
303 |
}
|
205 |
|
304 |
|
206 |
struct dirent *ent;
|
305 |
struct dirent *ent;
|
207 |
while ((ent = readdir(d)) != 0) {
|
306 |
while ((ent = readdir(d)) != 0) {
|
|
|
307 |
string fn;
|
|
|
308 |
struct stat st;
|
208 |
// Skip . and ..
|
309 |
// Skip . and ..
|
209 |
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
310 |
if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
|
210 |
continue;
|
311 |
continue;
|
211 |
|
312 |
|
212 |
// Skipped file names match ?
|
313 |
// Skipped file names match ?
|
213 |
if (!data->skippedNames.empty()) {
|
314 |
if (!data->skippedNames.empty()) {
|
214 |
if (inSkippedNames(ent->d_name))
|
315 |
if (inSkippedNames(ent->d_name))
|
215 |
goto skip;
|
316 |
continue;
|
216 |
}
|
317 |
}
|
217 |
|
318 |
|
218 |
{
|
|
|
219 |
string fn = path_cat(top, ent->d_name);
|
319 |
fn = path_cat(top, ent->d_name);
|
220 |
|
|
|
221 |
struct stat st;
|
|
|
222 |
int statret = (data->options & FtwFollow) ? stat(fn.c_str(), &st) :
|
320 |
int statret = (data->options & FtwFollow) ? stat(fn.c_str(), &st) :
|
223 |
lstat(fn.c_str(), &st);
|
321 |
lstat(fn.c_str(), &st);
|
224 |
if (statret == -1) {
|
322 |
if (statret == -1) {
|
225 |
data->logsyserr("stat", fn);
|
323 |
data->logsyserr("stat", fn);
|
226 |
continue;
|
324 |
continue;
|
227 |
}
|
325 |
}
|
228 |
if (!data->skippedPaths.empty()) {
|
326 |
if (!data->skippedPaths.empty()) {
|
229 |
// We do not check the ancestors. This means that you can have
|
327 |
// We do not check the ancestors. This means that you can have
|
230 |
// a topdirs member under a skippedPath, to index a portion of
|
328 |
// a topdirs member under a skippedPath, to index a portion of
|
231 |
// an ignored area. This is the way it had always worked, but
|
329 |
// an ignored area. This is the way it had always worked, but
|
232 |
// this was broken by 1.13.00 and the systematic use of
|
330 |
// this was broken by 1.13.00 and the systematic use of
|
233 |
// FNM_LEADING_DIR
|
331 |
// FNM_LEADING_DIR
|
234 |
if (inSkippedPaths(fn, false))
|
332 |
if (inSkippedPaths(fn, false))
|
235 |
goto skip;
|
333 |
continue;
|
236 |
}
|
334 |
}
|
237 |
|
335 |
|
238 |
if (S_ISDIR(st.st_mode)) {
|
336 |
if (S_ISDIR(st.st_mode)) {
|
239 |
if (data->options & FtwNoRecurse) {
|
337 |
if (data->options & FtwNoRecurse) {
|
240 |
status = cb.processone(fn, &st, FtwDirEnter);
|
338 |
status = cb.processone(fn, &st, FtwDirEnter);
|
241 |
} else {
|
339 |
} else {
|
|
|
340 |
if (data->options & FtwTravNatural) {
|
242 |
status = iwalk(fn, &st, cb);
|
341 |
status = iwalk(fn, &st, cb);
|
243 |
}
|
342 |
} else {
|
|
|
343 |
// If first subdir, push marker to separate
|
|
|
344 |
// from entries for other dir. This is to help
|
|
|
345 |
// with generating DirReturn callbacks
|
|
|
346 |
if (!nullpush) {
|
|
|
347 |
if (!data->dirs.empty() && data->dirs.back() != "")
|
|
|
348 |
data->dirs.push_back("");
|
|
|
349 |
nullpush = true;
|
|
|
350 |
}
|
|
|
351 |
data->dirs.push_back(fn);
|
|
|
352 |
continue;
|
|
|
353 |
}
|
|
|
354 |
}
|
|
|
355 |
// Note: only recursive case gets here.
|
244 |
if (status & (FtwStop|FtwError))
|
356 |
if (status & (FtwStop|FtwError))
|
245 |
goto out;
|
357 |
goto out;
|
246 |
|
|
|
247 |
// No DirReturn call when not recursing
|
|
|
248 |
if (!(data->options & FtwNoRecurse))
|
358 |
if (!(data->options & FtwNoRecurse))
|
249 |
if ((status = cb.processone(top, &st, FtwDirReturn))
|
359 |
if ((status = cb.processone(top, &st, FtwDirReturn))
|
250 |
& (FtwStop|FtwError))
|
360 |
& (FtwStop|FtwError))
|
251 |
goto out;
|
361 |
goto out;
|
252 |
} else if (S_ISREG(st.st_mode)) {
|
362 |
} else if (S_ISREG(st.st_mode)) {
|
253 |
if ((status = cb.processone(fn, &st, FtwRegular)) &
|
363 |
if ((status = cb.processone(fn, &st, FtwRegular)) &
|
254 |
(FtwStop|FtwError)) {
|
364 |
(FtwStop|FtwError)) {
|
255 |
goto out;
|
365 |
goto out;
|
256 |
}
|
366 |
}
|
257 |
}
|
367 |
}
|
258 |
}
|
|
|
259 |
|
|
|
260 |
skip: ;
|
|
|
261 |
|
|
|
262 |
// We skip other file types (devices etc...)
|
368 |
// We ignore other file types (devices etc...)
|
263 |
}
|
369 |
} // readdir loop
|
264 |
|
370 |
|
265 |
out:
|
371 |
out:
|
266 |
if (d)
|
372 |
if (d)
|
267 |
closedir(d);
|
373 |
closedir(d);
|
268 |
return status;
|
374 |
return status;
|
|
... |
|
... |
281 |
#define OPT_MOINS 0x1
|
387 |
#define OPT_MOINS 0x1
|
282 |
#define OPT_p 0x2
|
388 |
#define OPT_p 0x2
|
283 |
#define OPT_P 0x4
|
389 |
#define OPT_P 0x4
|
284 |
#define OPT_r 0x8
|
390 |
#define OPT_r 0x8
|
285 |
#define OPT_c 0x10
|
391 |
#define OPT_c 0x10
|
|
|
392 |
#define OPT_b 0x20
|
|
|
393 |
#define OPT_d 0x40
|
|
|
394 |
#define OPT_m 0x80
|
286 |
|
395 |
|
287 |
class myCB : public FsTreeWalkerCB {
|
396 |
class myCB : public FsTreeWalkerCB {
|
288 |
public:
|
397 |
public:
|
289 |
FsTreeWalker::Status processone(const string &path,
|
398 |
FsTreeWalker::Status processone(const string &path,
|
290 |
const struct stat *st,
|
399 |
const struct stat *st,
|
|
... |
|
... |
304 |
}
|
413 |
}
|
305 |
};
|
414 |
};
|
306 |
|
415 |
|
307 |
static const char *thisprog;
|
416 |
static const char *thisprog;
|
308 |
|
417 |
|
|
|
418 |
// Note that breadth first sorting is relatively expensive: less inode
|
|
|
419 |
// locality, more disk usage (and also more user memory usage, does not appear
|
|
|
420 |
// here):
|
|
|
421 |
// y$ time trfstreewalk -d / 2>&1 > /tmp/alld
|
|
|
422 |
// real 15m32.520s user 0m4.815s sys 0m33.014s
|
|
|
423 |
// y$ time trfstreewalk -b / 2>&1 > /tmp/allb
|
|
|
424 |
// real 17m3.380s user 0m4.658s sys 0m34.473s
|
|
|
425 |
// y$ wc /tmp/allb /tmp/alld
|
|
|
426 |
// 2618155 3208127 221619857 /tmp/allb
|
|
|
427 |
// 2618154 3208126 221619847 /tmp/alld
|
|
|
428 |
// (diff due to changes in /proc)
|
|
|
429 |
|
309 |
static char usage [] =
|
430 |
static char usage [] =
|
310 |
"trfstreewalk [-p pattern] [-P ignpath] [-r] [-c] topdir\n"
|
431 |
"trfstreewalk [-p pattern] [-P ignpath] [-r] [-c] topdir\n"
|
311 |
" -r : norecurse\n"
|
432 |
" -r : norecurse\n"
|
312 |
" -c : no path canonification\n"
|
433 |
" -c : no path canonification\n"
|
|
|
434 |
" -b : use breadth first walk\n"
|
|
|
435 |
" -d : use almost depth first (dir files, then subdirs)\n"
|
|
|
436 |
" -m : use breadth up to 4 deep then switch to -d\n"
|
313 |
;
|
437 |
;
|
314 |
static void
|
438 |
static void
|
315 |
Usage(void)
|
439 |
Usage(void)
|
316 |
{
|
440 |
{
|
317 |
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
|
441 |
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
|
|
... |
|
... |
330 |
if (!(**argv))
|
454 |
if (!(**argv))
|
331 |
/* Cas du "adb - core" */
|
455 |
/* Cas du "adb - core" */
|
332 |
Usage();
|
456 |
Usage();
|
333 |
while (**argv)
|
457 |
while (**argv)
|
334 |
switch (*(*argv)++) {
|
458 |
switch (*(*argv)++) {
|
|
|
459 |
case 'b': op_flags |= OPT_b; break;
|
|
|
460 |
case 'c': op_flags |= OPT_c; break;
|
|
|
461 |
case 'd': op_flags |= OPT_d; break;
|
|
|
462 |
case 'm': op_flags |= OPT_m; break;
|
335 |
case 'r': op_flags |= OPT_r; break;
|
463 |
case 'r': op_flags |= OPT_r; break;
|
336 |
case 'c': op_flags |= OPT_c; break;
|
|
|
337 |
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
|
464 |
case 'p': op_flags |= OPT_p; if (argc < 2) Usage();
|
338 |
patterns.push_back(*(++argv));
|
465 |
patterns.push_back(*(++argv));
|
339 |
argc--;
|
466 |
argc--;
|
340 |
goto b1;
|
467 |
goto b1;
|
341 |
case 'P': op_flags |= OPT_P; if (argc < 2) Usage();
|
468 |
case 'P': op_flags |= OPT_P; if (argc < 2) Usage();
|
|
... |
|
... |
354 |
int opt = 0;
|
481 |
int opt = 0;
|
355 |
if (op_flags & OPT_r)
|
482 |
if (op_flags & OPT_r)
|
356 |
opt |= FsTreeWalker::FtwNoRecurse;
|
483 |
opt |= FsTreeWalker::FtwNoRecurse;
|
357 |
if (op_flags & OPT_c)
|
484 |
if (op_flags & OPT_c)
|
358 |
opt |= FsTreeWalker::FtwNoCanon;
|
485 |
opt |= FsTreeWalker::FtwNoCanon;
|
|
|
486 |
|
|
|
487 |
if (op_flags & OPT_b)
|
|
|
488 |
opt |= FsTreeWalker::FtwTravBreadth;
|
|
|
489 |
else if (op_flags & OPT_d)
|
|
|
490 |
opt |= FsTreeWalker::FtwTravFilesThenDirs;
|
|
|
491 |
else if (op_flags & OPT_m)
|
|
|
492 |
opt |= FsTreeWalker::FtwTravBreadthThenDepth;
|
|
|
493 |
|
359 |
FsTreeWalker walker(opt);
|
494 |
FsTreeWalker walker(opt);
|
360 |
walker.setSkippedNames(patterns);
|
495 |
walker.setSkippedNames(patterns);
|
361 |
walker.setSkippedPaths(paths);
|
496 |
walker.setSkippedPaths(paths);
|
362 |
myCB cb;
|
497 |
myCB cb;
|
363 |
walker.walk(topdir, cb);
|
498 |
walker.walk(topdir, cb);
|