--- a/src/index/fsindexer.cpp
+++ b/src/index/fsindexer.cpp
@@ -313,7 +313,9 @@
int abslen;
if (m_config->getConfParam("idxabsmlen", &abslen))
m_db->setAbstractParams(abslen, -1, -1);
-
+
+ m_purgeCandidates.setRecord(true);
+
// We use an FsTreeWalker just for handling the skipped path/name lists
FsTreeWalker walker;
walker.setSkippedPaths(m_config->getSkippedPaths());
@@ -365,6 +367,21 @@
m_dwqueue.waitIdle();
m_db->waitUpdIdle();
#endif // IDX_THREADS
+
+ // Purge possible orphan documents
+ if (ret == true) {
+ LOGDEB(("Indexfiles: purging orphans\n"));
+ const vector<string>& purgecandidates = m_purgeCandidates.getCandidates();
+ for (vector<string>::const_iterator it = purgecandidates.begin();
+ it != purgecandidates.end(); it++) {
+ LOGDEB(("Indexfiles: purging orphans for %s\n", it->c_str()));
+ m_db->purgeOrphans(*it);
+ }
+#ifdef IDX_THREADS
+ m_db->waitUpdIdle();
+#endif // IDX_THREADS
+ }
+
LOGDEB(("FsIndexer::indexFiles: done\n"));
return ret;
}
@@ -622,6 +639,27 @@
return processonefile(m_config, fn, stp, m_localfields, m_mdreapers);
}
+// File name transcoded to utf8 for indexing. If this fails, the file
+// name won't be indexed, no big deal Note that we used to do the full
+// path here, but I ended up believing that it made more sense to use
+// only the file name The charset is used is the one from the locale.
+static string compute_utf8fn(RclConfig *config, const string& fn)
+{
+ string charset = config->getDefCharset(true);
+ string utf8fn;
+ int ercnt;
+ if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
+ LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
+ charset.c_str(), path_getsimple(fn).c_str()));
+ } else if (ercnt) {
+ LOGDEB(("processone: fn transcode %d errors from [%s] to UTF-8: %s\n",
+ ercnt, charset.c_str(), path_getsimple(fn).c_str()));
+ }
+ LOGDEB2(("processone: fn transcoded from [%s] to [%s] (%s->%s)\n",
+ path_getsimple(fn).c_str(), utf8fn.c_str(), charset.c_str(),
+ "UTF-8"));
+ return utf8fn;
+}
FsTreeWalker::Status
FsIndexer::processonefile(RclConfig *config,
@@ -644,7 +682,8 @@
makesig(stp, sig);
string udi;
make_udi(fn, cstr_null, udi);
- bool needupdate = m_db->needUpdate(udi, sig);
+ bool existingDoc;
+ bool needupdate = m_db->needUpdate(udi, sig, &existingDoc);
if (!needupdate) {
LOGDEB0(("processone: up to date: %s\n", fn.c_str()));
@@ -673,32 +712,19 @@
}
interner.setMissingStore(m_missing);
- // File name transcoded to utf8 for indexing.
- // If this fails, the file name won't be indexed, no big deal
- // Note that we used to do the full path here, but I ended up believing
- // that it made more sense to use only the file name
- // The charset is used is the one from the locale.
- string charset = config->getDefCharset(true);
- string utf8fn; int ercnt;
- if (!transcode(path_getsimple(fn), utf8fn, charset, "UTF-8", &ercnt)) {
- LOGERR(("processone: fn transcode failure from [%s] to UTF-8: %s\n",
- charset.c_str(), path_getsimple(fn).c_str()));
- } else if (ercnt) {
- LOGDEB(("processone: fn transcode %d errors from [%s] to UTF-8: %s\n",
- ercnt, charset.c_str(), path_getsimple(fn).c_str()));
- }
- LOGDEB2(("processone: fn transcoded from [%s] to [%s] (%s->%s)\n",
- path_getsimple(fn).c_str(), utf8fn.c_str(), charset.c_str(),
- "UTF-8"));
-
- string parent_udi;
- make_udi(fn, cstr_null, parent_udi);
+ string utf8fn = compute_utf8fn(config, fn);
+
+ // parent_udi is initially the same as udi, it will be used if there
+ // are subdocs.
+ string parent_udi = udi;
+
Rcl::Doc doc;
char ascdate[30];
sprintf(ascdate, "%ld", long(stp->st_mtime));
FileInterner::Status fis = FileInterner::FIAgain;
bool hadNullIpath = false;
+ bool hadNonNullIpath = false;
while (fis == FileInterner::FIAgain) {
doc.erase();
try {
@@ -708,7 +734,7 @@
return FsTreeWalker::FtwStop;
}
- // Index at least the file name even if there was an error.
+ // We index at least the file name even if there was an error.
// We'll change the signature to ensure that the indexing will
// be retried every time.
@@ -718,7 +744,10 @@
hadNullIpath = true;
if (m_havemdreapers)
reapmetadata(mdreapers, fn, doc);
- }
+ } else {
+ hadNonNullIpath = true;
+ make_udi(fn, doc.ipath, udi);
+ }
// Set file name, mod time and url if not done by filter
if (doc.fmtime.empty())
@@ -732,11 +761,9 @@
char cbuf[100];
sprintf(cbuf, OFFTPC, stp->st_size);
doc.pcbytes = cbuf;
- // Document signature for up to date checks: concatenate
- // m/ctime and size. Looking for changes only, no need to
- // parseback so no need for reversible formatting. Also set,
- // but never used, for subdocs.
- makesig(stp, doc.sig);
+ // Document signature for up to date checks. All subdocs inherit the
+ // file's.
+ doc.sig = sig;
// If there was an error, ensure indexing will be
// retried. This is for the once missing, later installed
@@ -750,14 +777,13 @@
// Possibly add fields from local config
if (m_havelocalfields)
setlocalfields(localfields, doc);
+
// Add document to database. If there is an ipath, add it as a children
// of the file document.
- string udi;
- make_udi(fn, doc.ipath, udi);
-
#ifdef IDX_THREADS
if (m_haveSplitQ) {
- DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ? cstr_null : parent_udi, doc);
+ DbUpdTask *tp = new DbUpdTask(udi, doc.ipath.empty() ?
+ cstr_null : parent_udi, doc);
if (!m_dwqueue.put(tp)) {
LOGERR(("processonefile: wqueue.put failed\n"));
return FsTreeWalker::FtwError;
@@ -787,6 +813,15 @@
return FsTreeWalker::FtwStop;
}
}
+ }
+
+ // If this doc existed and it's a container, recording for
+ // possible subdoc purge (this will be used only if we don't do a
+ // db-wide purge, e.g. if we're called from indexfiles()).
+ LOGDEB2(("processOnefile: existingDoc %d hadNonNullIpath %d\n",
+ existingDoc, hadNonNullIpath));
+ if (existingDoc && hadNonNullIpath) {
+ m_purgeCandidates.record(parent_udi);
}
// If we had no instance with a null ipath, we create an empty
@@ -806,8 +841,7 @@
char cbuf[100];
sprintf(cbuf, OFFTPC, stp->st_size);
fileDoc.pcbytes = cbuf;
- // Document signature for up to date checks.
- makesig(stp, fileDoc.sig);
+ fileDoc.sig = sig;
#ifdef IDX_THREADS
if (m_haveSplitQ) {