|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
1058 |
// gets added to basepos in addition to the inter-section increment
|
1058 |
// gets added to basepos in addition to the inter-section increment
|
1059 |
// to compute the first position of the next section.
|
1059 |
// to compute the first position of the next section.
|
1060 |
Xapian::termpos curpos;
|
1060 |
Xapian::termpos curpos;
|
1061 |
|
1061 |
|
1062 |
TextSplitDb(Xapian::Document &d, TermProc *prc)
|
1062 |
TextSplitDb(Xapian::Document &d, TermProc *prc)
|
1063 |
: TextSplitP(prc),
|
1063 |
: TextSplitP(prc), doc(d), basepos(1), curpos(0)
|
1064 |
doc(d), basepos(1), curpos(0), wdfinc(1)
|
|
|
1065 |
{}
|
1064 |
{}
|
1066 |
|
1065 |
|
1067 |
// Reimplement text_to_words to insert the begin and end anchor terms.
|
1066 |
// Reimplement text_to_words to insert the begin and end anchor terms.
|
1068 |
virtual bool text_to_words(const string &in)
|
1067 |
virtual bool text_to_words(const string &in)
|
1069 |
{
|
1068 |
{
|
1070 |
bool ret = false;
|
1069 |
bool ret = false;
|
1071 |
string ermsg;
|
1070 |
string ermsg;
|
1072 |
|
1071 |
|
1073 |
try {
|
1072 |
try {
|
1074 |
// Index the possibly prefixed start term.
|
1073 |
// Index the possibly prefixed start term.
|
1075 |
doc.add_posting(prefix + start_of_field_term, basepos, wdfinc);
|
1074 |
doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
|
1076 |
++basepos;
|
1075 |
++basepos;
|
1077 |
} XCATCHERROR(ermsg);
|
1076 |
} XCATCHERROR(ermsg);
|
1078 |
if (!ermsg.empty()) {
|
1077 |
if (!ermsg.empty()) {
|
1079 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
1078 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
1080 |
goto out;
|
1079 |
goto out;
|
|
... |
|
... |
1085 |
goto out;
|
1084 |
goto out;
|
1086 |
}
|
1085 |
}
|
1087 |
|
1086 |
|
1088 |
try {
|
1087 |
try {
|
1089 |
// Index the possibly prefixed end term.
|
1088 |
// Index the possibly prefixed end term.
|
1090 |
doc.add_posting(prefix + end_of_field_term, basepos + curpos + 1,
|
1089 |
doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
|
1091 |
wdfinc);
|
1090 |
ft.wdfinc);
|
1092 |
++basepos;
|
1091 |
++basepos;
|
1093 |
} XCATCHERROR(ermsg);
|
1092 |
} XCATCHERROR(ermsg);
|
1094 |
if (!ermsg.empty()) {
|
1093 |
if (!ermsg.empty()) {
|
1095 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
1094 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
1096 |
goto out;
|
1095 |
goto out;
|
|
... |
|
... |
1101 |
out:
|
1100 |
out:
|
1102 |
basepos += curpos + 100;
|
1101 |
basepos += curpos + 100;
|
1103 |
return true;
|
1102 |
return true;
|
1104 |
}
|
1103 |
}
|
1105 |
|
1104 |
|
1106 |
void setprefix(const string& pref)
|
1105 |
void setTraits(const FieldTraits& ftp)
|
1107 |
{
|
1106 |
{
|
1108 |
if (pref.empty())
|
1107 |
ft = ftp;
|
1109 |
prefix.clear();
|
|
|
1110 |
else
|
|
|
1111 |
prefix = wrap_prefix(pref);
|
|
|
1112 |
}
|
|
|
1113 |
|
|
|
1114 |
void setwdfinc(int i)
|
|
|
1115 |
{
|
|
|
1116 |
wdfinc = i;
|
|
|
1117 |
}
|
1108 |
}
|
1118 |
|
1109 |
|
1119 |
friend class TermProcIdx;
|
1110 |
friend class TermProcIdx;
|
1120 |
|
1111 |
|
1121 |
private:
|
1112 |
private:
|
1122 |
// If prefix is set, we also add a posting for the prefixed terms
|
1113 |
FieldTraits ft;
|
1123 |
// (ie: for titles, add postings for both "term" and "Sterm")
|
|
|
1124 |
string prefix;
|
|
|
1125 |
// Some fields have more weight
|
|
|
1126 |
int wdfinc;
|
|
|
1127 |
};
|
1114 |
};
|
1128 |
|
1115 |
|
1129 |
class TermProcIdx : public TermProc {
|
1116 |
class TermProcIdx : public TermProc {
|
1130 |
public:
|
1117 |
public:
|
1131 |
TermProcIdx() : TermProc(0), m_ts(0), m_lastpagepos(0), m_pageincr(0) {}
|
1118 |
TermProcIdx() : TermProc(0), m_ts(0), m_lastpagepos(0), m_pageincr(0) {}
|
|
... |
|
... |
1143 |
return true;
|
1130 |
return true;
|
1144 |
string ermsg;
|
1131 |
string ermsg;
|
1145 |
try {
|
1132 |
try {
|
1146 |
// Index without prefix, using the field-specific weighting
|
1133 |
// Index without prefix, using the field-specific weighting
|
1147 |
LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str()));
|
1134 |
LOGDEB1(("Emitting term at %d : [%s]\n", pos, term.c_str()));
|
|
|
1135 |
if (!m_ts->ft.pfxonly)
|
1148 |
m_ts->doc.add_posting(term, pos, m_ts->wdfinc);
|
1136 |
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
|
|
|
1137 |
|
1149 |
#ifdef TESTING_XAPIAN_SPELL
|
1138 |
#ifdef TESTING_XAPIAN_SPELL
|
1150 |
if (Db::isSpellingCandidate(term)) {
|
1139 |
if (Db::isSpellingCandidate(term)) {
|
1151 |
m_ts->db.add_spelling(term);
|
1140 |
m_ts->db.add_spelling(term);
|
1152 |
}
|
1141 |
}
|
1153 |
#endif
|
1142 |
#endif
|
1154 |
// Index the prefixed term.
|
1143 |
// Index the prefixed term.
|
1155 |
if (!m_ts->prefix.empty()) {
|
1144 |
if (!m_ts->ft.pfx.empty()) {
|
1156 |
m_ts->doc.add_posting(m_ts->prefix + term, pos, m_ts->wdfinc);
|
1145 |
m_ts->doc.add_posting(m_ts->ft.pfx + term, pos,
|
|
|
1146 |
m_ts->ft.wdfinc);
|
1157 |
}
|
1147 |
}
|
1158 |
return true;
|
1148 |
return true;
|
1159 |
} XCATCHERROR(ermsg);
|
1149 |
} XCATCHERROR(ermsg);
|
1160 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
1150 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
1161 |
return false;
|
1151 |
return false;
|
|
... |
|
... |
1166 |
if (pos < int(baseTextPosition)) {
|
1156 |
if (pos < int(baseTextPosition)) {
|
1167 |
LOGDEB(("newpage: not in body\n", pos));
|
1157 |
LOGDEB(("newpage: not in body\n", pos));
|
1168 |
return;
|
1158 |
return;
|
1169 |
}
|
1159 |
}
|
1170 |
|
1160 |
|
1171 |
m_ts->doc.add_posting(m_ts->prefix + page_break_term, pos);
|
1161 |
m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
|
1172 |
if (pos == m_lastpagepos) {
|
1162 |
if (pos == m_lastpagepos) {
|
1173 |
m_pageincr++;
|
1163 |
m_pageincr++;
|
1174 |
LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n",
|
1164 |
LOGDEB2(("newpage: same pos, pageincr %d lastpagepos %d\n",
|
1175 |
m_pageincr, m_lastpagepos));
|
1165 |
m_pageincr, m_lastpagepos));
|
1176 |
} else {
|
1166 |
} else {
|
|
... |
|
... |
1349 |
continue;
|
1339 |
continue;
|
1350 |
}
|
1340 |
}
|
1351 |
LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n",
|
1341 |
LOGDEB0(("Db::add: field [%s] pfx [%s] inc %d: [%s]\n",
|
1352 |
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
1342 |
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
1353 |
meta_it->second.c_str()));
|
1343 |
meta_it->second.c_str()));
|
1354 |
splitter.setprefix(ftp->pfx);
|
1344 |
splitter.setTraits(*ftp);
|
1355 |
splitter.setwdfinc(ftp->wdfinc);
|
|
|
1356 |
if (!splitter.text_to_words(meta_it->second))
|
1345 |
if (!splitter.text_to_words(meta_it->second))
|
1357 |
LOGDEB(("Db::addOrUpdate: split failed for %s\n",
|
1346 |
LOGDEB(("Db::addOrUpdate: split failed for %s\n",
|
1358 |
meta_it->first.c_str()));
|
1347 |
meta_it->first.c_str()));
|
1359 |
}
|
1348 |
}
|
1360 |
}
|
1349 |
}
|
1361 |
splitter.setprefix(string());
|
1350 |
|
1362 |
splitter.setwdfinc(1);
|
1351 |
// Reset to no prefix and default params
|
|
|
1352 |
splitter.setTraits(FieldTraits());
|
1363 |
|
1353 |
|
1364 |
if (splitter.curpos < baseTextPosition)
|
1354 |
if (splitter.curpos < baseTextPosition)
|
1365 |
splitter.basepos = baseTextPosition;
|
1355 |
splitter.basepos = baseTextPosition;
|
1366 |
|
1356 |
|
1367 |
// Split and index body text
|
1357 |
// Split and index body text
|
|
... |
|
... |
1632 |
// Clear the previous terms for the field
|
1622 |
// Clear the previous terms for the field
|
1633 |
clearField(xdoc, ftp->pfx, ftp->wdfinc);
|
1623 |
clearField(xdoc, ftp->pfx, ftp->wdfinc);
|
1634 |
LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n",
|
1624 |
LOGDEB0(("Db::xattrOnly: field [%s] pfx [%s] inc %d: [%s]\n",
|
1635 |
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
1625 |
meta_it->first.c_str(), ftp->pfx.c_str(), ftp->wdfinc,
|
1636 |
meta_it->second.c_str()));
|
1626 |
meta_it->second.c_str()));
|
1637 |
splitter->setprefix(ftp->pfx);
|
1627 |
splitter->setTraits(*ftp);
|
1638 |
splitter->setwdfinc(ftp->wdfinc);
|
|
|
1639 |
if (!splitter->text_to_words(meta_it->second))
|
1628 |
if (!splitter->text_to_words(meta_it->second))
|
1640 |
LOGDEB(("Db::xattrOnly: split failed for %s\n",
|
1629 |
LOGDEB(("Db::xattrOnly: split failed for %s\n",
|
1641 |
meta_it->first.c_str()));
|
1630 |
meta_it->first.c_str()));
|
1642 |
}
|
1631 |
}
|
1643 |
xdoc.add_value(VALUE_SIG, doc.sig);
|
1632 |
xdoc.add_value(VALUE_SIG, doc.sig);
|