|
a/src/rcldb/rcldb.h |
|
b/src/rcldb/rcldb.h |
|
... |
|
... |
245 |
* Side-effect: set the existence flag for the file document
|
245 |
* Side-effect: set the existence flag for the file document
|
246 |
* and all subdocs if any (for later use by 'purge()')
|
246 |
* and all subdocs if any (for later use by 'purge()')
|
247 |
*/
|
247 |
*/
|
248 |
bool needUpdate(const string &udi, const string& sig);
|
248 |
bool needUpdate(const string &udi, const string& sig);
|
249 |
|
249 |
|
250 |
/** Add or update document. The Doc class should have been filled as much as
|
250 |
/** Add or update document identified by unique identifier.
|
|
|
251 |
* @param config Config object to use. Can be the same as the member config
|
|
|
252 |
* or a clone, to avoid sharing when called in multithread context.
|
|
|
253 |
* @param udi the Unique Document Identifier is opaque to us.
|
|
|
254 |
* Maximum size 150 bytes.
|
|
|
255 |
* @param parent_udi the UDI for the container document. In case of complex
|
|
|
256 |
* embedding, this is not always the immediate parent but the UDI for
|
|
|
257 |
* the container file (which may be a farther ancestor). It is
|
|
|
258 |
* used for purging subdocuments when a file ceases to exist and
|
|
|
259 |
* to set the existence flags of all subdocuments of a container
|
|
|
260 |
* that is found to be up to date. In other words, the
|
|
|
261 |
* parent_udi is the UDI for the ancestor of the document which
|
|
|
262 |
* is subject to needUpdate() and physical existence tests (some
|
|
|
263 |
* kind of file equivalent). Empty for top-level docs. Should
|
|
|
264 |
* probably be renamed container_udi.
|
|
|
265 |
* @param doc container for document data. Should have been filled as
|
251 |
* possible depending on the document type. parent_udi is only
|
266 |
* much as possible depending on the document type.
|
252 |
* use for subdocs, else set it to empty */
|
267 |
*/
|
253 |
bool addOrUpdate(RclConfig *config, const string &udi,
|
268 |
bool addOrUpdate(RclConfig *config, const string &udi,
|
254 |
const string &parent_udi, Doc &doc);
|
269 |
const string &parent_udi, Doc &doc);
|
255 |
#ifdef IDX_THREADS
|
270 |
#ifdef IDX_THREADS
|
256 |
void waitUpdIdle();
|
271 |
void waitUpdIdle();
|
257 |
#endif
|
272 |
#endif
|
|
... |
|
... |
259 |
/** Delete document(s) for given UDI, including subdocs */
|
274 |
/** Delete document(s) for given UDI, including subdocs */
|
260 |
bool purgeFile(const string &udi, bool *existed = 0);
|
275 |
bool purgeFile(const string &udi, bool *existed = 0);
|
261 |
|
276 |
|
262 |
/** Remove documents that no longer exist in the file system. This
|
277 |
/** Remove documents that no longer exist in the file system. This
|
263 |
* depends on the update map, which is built during
|
278 |
* depends on the update map, which is built during
|
264 |
* indexing (needUpdate()).
|
279 |
* indexing (needUpdate() / addOrUpdate()).
|
265 |
*
|
280 |
*
|
266 |
* This should only be called after a full walk of
|
281 |
* This should only be called after a full walk of
|
267 |
* the file system, else the update map will not be complete, and
|
282 |
* the file system, else the update map will not be complete, and
|
268 |
* many documents will be deleted that shouldn't, which is why this
|
283 |
* many documents will be deleted that shouldn't, which is why this
|
269 |
* has to be called externally, rcldb can't know if the indexing
|
284 |
* has to be called externally, rcldb can't know if the indexing
|
270 |
* pass was complete or partial.
|
285 |
* pass was complete or partial.
|
271 |
*/
|
286 |
*/
|
272 |
bool purge();
|
287 |
bool purge();
|
273 |
|
288 |
|
274 |
/** Create stem expansion database for given language. */
|
289 |
/** Create stem expansion database for given languages. */
|
275 |
bool createStemDbs(const std::vector<std::string> &langs);
|
290 |
bool createStemDbs(const std::vector<std::string> &langs);
|
276 |
/** Delete stem expansion database for given language. */
|
291 |
/** Delete stem expansion database for given language. */
|
277 |
bool deleteStemDb(const string &lang);
|
292 |
bool deleteStemDb(const string &lang);
|
278 |
|
293 |
|
279 |
/* Query-related methods ************************************/
|
294 |
/* Query-related methods ************************************/
|
280 |
|
295 |
|
281 |
/** Return total docs in db */
|
296 |
/** Return total docs in db */
|
282 |
int docCnt();
|
297 |
int docCnt();
|
283 |
/** Return count of docs which have an occurrence of term */
|
298 |
/** Return count of docs which have an occurrence of term */
|
284 |
int termDocCnt(const string& term);
|
299 |
int termDocCnt(const string& term);
|
285 |
/** Add extra database for querying */
|
300 |
/** Add extra Xapian database for querying.
|
|
|
301 |
* @param dir must point to something which can be passed as parameter
|
|
|
302 |
* to a Xapian::Database constructor (directory or stub).
|
|
|
303 |
*/
|
286 |
bool addQueryDb(const string &dir);
|
304 |
bool addQueryDb(const string &dir);
|
287 |
/** Remove extra database. if dir == "", remove all. */
|
305 |
/** Remove extra database. if dir == "", remove all. */
|
288 |
bool rmQueryDb(const string &dir);
|
306 |
bool rmQueryDb(const string &dir);
|
289 |
/** Look where the doc result comes from.
|
307 |
/** Look where the doc result comes from.
|
|
|
308 |
* @param doc must come from a db query so that "opaque" xdocid is set.
|
290 |
* @return: 0 main index, (size_t)-1 don't know,
|
309 |
* @return: 0 main index, (size_t)-1 don't know,
|
291 |
* other: order of database in add_database() sequence.
|
310 |
* other: order of database in add_database() sequence.
|
292 |
*/
|
311 |
*/
|
293 |
size_t whatDbIdx(const Doc& doc);
|
312 |
size_t whatDbIdx(const Doc& doc);
|
294 |
/** Tell if directory seems to hold xapian db */
|
313 |
/** Tell if directory seems to hold xapian db */
|
|
... |
|
... |
310 |
* in the TermMatchResult header
|
329 |
* in the TermMatchResult header
|
311 |
*/
|
330 |
*/
|
312 |
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
|
331 |
enum MatchType {ET_WILD, ET_REGEXP, ET_STEM};
|
313 |
bool termMatch(MatchType typ, const string &lang, const string &term,
|
332 |
bool termMatch(MatchType typ, const string &lang, const string &term,
|
314 |
TermMatchResult& result, int max = -1,
|
333 |
TermMatchResult& result, int max = -1,
|
315 |
const string& field = cstr_null
|
334 |
const string& field = cstr_null);
|
316 |
);
|
|
|
317 |
/** Return min and max years for doc mod times in db */
|
335 |
/** Return min and max years for doc mod times in db */
|
318 |
bool maxYearSpan(int *minyear, int *maxyear);
|
336 |
bool maxYearSpan(int *minyear, int *maxyear);
|
319 |
|
337 |
|
320 |
/** Wildcard expansion specific to file names. Internal/sdata use only */
|
338 |
/** Wildcard expansion specific to file names. Internal/sdata use only */
|
321 |
bool filenameWildExp(const string& exp, vector<string>& names, int max);
|
339 |
bool filenameWildExp(const string& exp, vector<string>& names, int max);
|
|
... |
|
... |
330 |
{
|
348 |
{
|
331 |
return m_synthAbsLen;
|
349 |
return m_synthAbsLen;
|
332 |
}
|
350 |
}
|
333 |
/** Get document for given udi
|
351 |
/** Get document for given udi
|
334 |
*
|
352 |
*
|
335 |
* Used by the 'history' feature (and nothing else?)
|
353 |
* Used by the 'history' feature, and to retrieve ancestor documents.
|
336 |
*/
|
354 |
*/
|
337 |
bool getDoc(const string &udi, Doc &doc);
|
355 |
bool getDoc(const string &udi, Doc &doc);
|
338 |
|
356 |
|
339 |
/* The following are mainly for the aspell module */
|
357 |
/* The following are mainly for the aspell module */
|
340 |
/** Whole term list walking. */
|
358 |
/** Whole term list walking. */
|