|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
74 |
const string udi_prefix("Q");
|
74 |
const string udi_prefix("Q");
|
75 |
const string parent_prefix("F");
|
75 |
const string parent_prefix("F");
|
76 |
|
76 |
|
77 |
// Special terms to mark begin/end of field (for anchored searches), and
|
77 |
// Special terms to mark begin/end of field (for anchored searches), and
|
78 |
// page breaks
|
78 |
// page breaks
|
79 |
#ifdef RCL_INDEX_STRIPCHARS
|
|
|
80 |
const string start_of_field_term = "XXST";
|
|
|
81 |
const string end_of_field_term = "XXND";
|
|
|
82 |
static const string page_break_term = "XXPG";
|
|
|
83 |
#else
|
|
|
84 |
string start_of_field_term;
|
79 |
string start_of_field_term;
|
85 |
string end_of_field_term;
|
80 |
string end_of_field_term;
|
86 |
const string page_break_term = "XXPG/";
|
81 |
const string page_break_term = "XXPG/";
|
87 |
#endif
|
|
|
88 |
|
82 |
|
89 |
// Field name for the unsplit file name. Has to exist in the field file
|
83 |
// Field name for the unsplit file name. Has to exist in the field file
|
90 |
// because of usage in termmatch()
|
84 |
// because of usage in termmatch()
|
91 |
const string unsplitFilenameFieldName = "rclUnsplitFN";
|
85 |
const string unsplitFilenameFieldName = "rclUnsplitFN";
|
92 |
static const string unsplitfilename_prefix = "XSFS";
|
86 |
static const string unsplitfilename_prefix = "XSFS";
|
|
... |
|
... |
354 |
m_occtxtsz(0), m_occFirstCheck(1),
|
348 |
m_occtxtsz(0), m_occFirstCheck(1),
|
355 |
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
|
349 |
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
|
356 |
m_flushMb(-1), m_maxFsOccupPc(0)
|
350 |
m_flushMb(-1), m_maxFsOccupPc(0)
|
357 |
{
|
351 |
{
|
358 |
m_config = new RclConfig(*cfp);
|
352 |
m_config = new RclConfig(*cfp);
|
359 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
360 |
if (start_of_field_term.empty()) {
|
353 |
if (start_of_field_term.empty()) {
|
361 |
if (o_index_stripchars) {
|
354 |
if (o_index_stripchars) {
|
362 |
start_of_field_term = "XXST";
|
355 |
start_of_field_term = "XXST";
|
363 |
end_of_field_term = "XXND";
|
356 |
end_of_field_term = "XXND";
|
364 |
} else {
|
357 |
} else {
|
365 |
start_of_field_term = "XXST/";
|
358 |
start_of_field_term = "XXST/";
|
366 |
end_of_field_term = "XXND/";
|
359 |
end_of_field_term = "XXND/";
|
367 |
}
|
360 |
}
|
368 |
}
|
361 |
}
|
369 |
#endif
|
|
|
370 |
|
362 |
|
371 |
m_ndb = new Native(this);
|
363 |
m_ndb = new Native(this);
|
372 |
if (m_config) {
|
364 |
if (m_config) {
|
373 |
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
365 |
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
374 |
m_config->getConfParam("idxflushmb", &m_flushMb);
|
366 |
m_config->getConfParam("idxflushmb", &m_flushMb);
|
|
... |
|
... |
400 |
|
392 |
|
401 |
if (m_ndb == 0 || m_config == 0) {
|
393 |
if (m_ndb == 0 || m_config == 0) {
|
402 |
m_reason = "Null configuration or Xapian Db";
|
394 |
m_reason = "Null configuration or Xapian Db";
|
403 |
return false;
|
395 |
return false;
|
404 |
}
|
396 |
}
|
405 |
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
397 |
LOGDEB(("Db::open: m_isopen %d m_iswritable %d mode %d\n", m_ndb->m_isopen,
|
406 |
m_ndb->m_iswritable));
|
398 |
m_ndb->m_iswritable, mode));
|
407 |
|
399 |
|
408 |
if (m_ndb->m_isopen) {
|
400 |
if (m_ndb->m_isopen) {
|
409 |
// We used to return an error here but I see no reason to
|
401 |
// We used to return an error here but I see no reason to
|
410 |
if (!close())
|
402 |
if (!close())
|
411 |
return false;
|
403 |
return false;
|
|
... |
|
... |
569 |
int res = -1;
|
561 |
int res = -1;
|
570 |
if (!m_ndb || !m_ndb->m_isopen)
|
562 |
if (!m_ndb || !m_ndb->m_isopen)
|
571 |
return -1;
|
563 |
return -1;
|
572 |
|
564 |
|
573 |
string term = _term;
|
565 |
string term = _term;
|
574 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
575 |
if (o_index_stripchars)
|
566 |
if (o_index_stripchars)
|
576 |
#endif
|
|
|
577 |
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
567 |
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
578 |
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
|
568 |
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
|
579 |
return 0;
|
569 |
return 0;
|
580 |
}
|
570 |
}
|
581 |
|
571 |
|
|
... |
|
... |
849 |
if (m_ndb == 0)
|
839 |
if (m_ndb == 0)
|
850 |
return string();
|
840 |
return string();
|
851 |
|
841 |
|
852 |
string term = word;
|
842 |
string term = word;
|
853 |
|
843 |
|
854 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
855 |
if (o_index_stripchars)
|
844 |
if (o_index_stripchars)
|
856 |
#endif
|
|
|
857 |
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
845 |
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
858 |
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
846 |
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
859 |
return string();
|
847 |
return string();
|
860 |
}
|
848 |
}
|
861 |
|
849 |
|
862 |
if (!isSpellingCandidate(term))
|
850 |
if (!isSpellingCandidate(term))
|
863 |
return string();
|
851 |
return string();
|
864 |
return m_ndb->xrdb.get_spelling_suggestion(term);
|
852 |
return m_ndb->xrdb.get_spelling_suggestion(term);
|
865 |
}
|
853 |
}
|
|
... |
|
... |
901 |
TermProc *nxt = &tpidx;
|
889 |
TermProc *nxt = &tpidx;
|
902 |
TermProcStop tpstop(nxt, m_stops);nxt = &tpstop;
|
890 |
TermProcStop tpstop(nxt, m_stops);nxt = &tpstop;
|
903 |
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
|
891 |
//TermProcCommongrams tpcommon(nxt, m_stops); nxt = &tpcommon;
|
904 |
|
892 |
|
905 |
TermProcPrep tpprep(nxt);
|
893 |
TermProcPrep tpprep(nxt);
|
906 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
907 |
if (o_index_stripchars)
|
894 |
if (o_index_stripchars)
|
908 |
#endif
|
|
|
909 |
nxt = &tpprep;
|
895 |
nxt = &tpprep;
|
910 |
|
896 |
|
911 |
TextSplitDb splitter(newdocument, nxt);
|
897 |
TextSplitDb splitter(newdocument, nxt);
|
912 |
tpidx.setTSD(&splitter);
|
898 |
tpidx.setTSD(&splitter);
|
913 |
|
899 |
|