|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
692 |
|
692 |
|
693 |
bool Db::o_inPlaceReset;
|
693 |
bool Db::o_inPlaceReset;
|
694 |
|
694 |
|
695 |
Db::Db(const RclConfig *cfp)
|
695 |
Db::Db(const RclConfig *cfp)
|
696 |
: m_ndb(0), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
|
696 |
: m_ndb(0), m_mode(Db::DbRO), m_curtxtsz(0), m_flushtxtsz(0),
|
697 |
m_occtxtsz(0), m_occFirstCheck(1),
|
697 |
m_occtxtsz(0), m_occFirstCheck(1), m_idxMetaStoredLen(150),
|
698 |
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
|
698 |
m_idxAbsTruncLen(250), m_synthAbsLen(250), m_synthAbsWordCtxLen(4),
|
699 |
m_flushMb(-1), m_maxFsOccupPc(0)
|
699 |
m_flushMb(-1), m_maxFsOccupPc(0)
|
700 |
{
|
700 |
{
|
701 |
m_config = new RclConfig(*cfp);
|
701 |
m_config = new RclConfig(*cfp);
|
702 |
if (start_of_field_term.empty()) {
|
702 |
if (start_of_field_term.empty()) {
|
|
... |
|
... |
711 |
|
711 |
|
712 |
m_ndb = new Native(this);
|
712 |
m_ndb = new Native(this);
|
713 |
if (m_config) {
|
713 |
if (m_config) {
|
714 |
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
714 |
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
715 |
m_config->getConfParam("idxflushmb", &m_flushMb);
|
715 |
m_config->getConfParam("idxflushmb", &m_flushMb);
|
|
|
716 |
m_config->getConfParam("idxmetastoredlen", &m_idxMetaStoredLen);
|
716 |
}
|
717 |
}
|
717 |
}
|
718 |
}
|
718 |
|
719 |
|
719 |
Db::~Db()
|
720 |
Db::~Db()
|
720 |
{
|
721 |
{
|
|
... |
|
... |
1467 |
}
|
1468 |
}
|
1468 |
|
1469 |
|
1469 |
if (!doc.ipath.empty())
|
1470 |
if (!doc.ipath.empty())
|
1470 |
RECORD_APPEND(record, Doc::keyipt, doc.ipath);
|
1471 |
RECORD_APPEND(record, Doc::keyipt, doc.ipath);
|
1471 |
|
1472 |
|
1472 |
doc.meta[Doc::keytt] =
|
1473 |
// Fields from the Meta array. Handle title specially because it has a
|
1473 |
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc);
|
1474 |
// different name inside the data record (history...)
|
1474 |
if (!doc.meta[Doc::keytt].empty())
|
1475 |
string& ttref = doc.meta[Doc::keytt];
|
|
|
1476 |
ttref = neutchars(truncate_to_word(ttref, m_idxMetaStoredLen), cstr_nc);
|
|
|
1477 |
if (!ttref.empty()) {
|
1475 |
RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]);
|
1478 |
RECORD_APPEND(record, cstr_caption, ttref);
|
1476 |
|
1479 |
ttref.clear();
|
1477 |
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
1480 |
}
|
1478 |
doc.meta[Doc::keykw] =
|
|
|
1479 |
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
|
|
|
1480 |
// No need to explicitly append the keywords, this will be done by
|
|
|
1481 |
// the "stored" loop
|
|
|
1482 |
|
1481 |
|
1483 |
// If abstract is empty, we make up one with the beginning of the
|
1482 |
// If abstract is empty, we make up one with the beginning of the
|
1484 |
// document. This is then not indexed, but part of the doc data so
|
1483 |
// document. This is then not indexed, but part of the doc data so
|
1485 |
// that we can return it to a query without having to decode the
|
1484 |
// that we can return it to a query without having to decode the
|
1486 |
// original file.
|
1485 |
// original file.
|
1487 |
bool syntabs = false;
|
1486 |
bool syntabs = false;
|
1488 |
// Note that the map accesses by operator[] create empty entries if they
|
1487 |
// Note that the map accesses by operator[] create empty entries if they
|
1489 |
// don't exist yet.
|
1488 |
// don't exist yet.
|
1490 |
trimstring(doc.meta[Doc::keyabs], " \t\r\n");
|
1489 |
string& absref = doc.meta[Doc::keyabs];
|
1491 |
if (doc.meta[Doc::keyabs].empty()) {
|
1490 |
trimstring(absref, " \t\r\n");
|
|
|
1491 |
if (absref.empty()) {
|
1492 |
syntabs = true;
|
1492 |
syntabs = true;
|
1493 |
if (!doc.text.empty())
|
1493 |
if (!doc.text.empty())
|
1494 |
doc.meta[Doc::keyabs] = cstr_syntAbs +
|
1494 |
absref = cstr_syntAbs +
|
1495 |
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), cstr_nc);
|
1495 |
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen),
|
|
|
1496 |
cstr_nc);
|
1496 |
} else {
|
1497 |
} else {
|
1497 |
doc.meta[Doc::keyabs] =
|
1498 |
absref = neutchars(truncate_to_word(absref, m_idxAbsTruncLen),
|
1498 |
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
1499 |
cstr_nc);
|
1499 |
cstr_nc);
|
|
|
1500 |
}
|
1500 |
}
|
|
|
1501 |
// Do the append here to avoid the different truncation done
|
|
|
1502 |
// in the regular "stored" loop
|
|
|
1503 |
if (!absref.empty()) {
|
|
|
1504 |
RECORD_APPEND(record, Doc::keyabs, absref);
|
|
|
1505 |
absref.clear();
|
|
|
1506 |
}
|
1501 |
|
1507 |
|
|
|
1508 |
// Append all regular "stored" meta fields
|
1502 |
const set<string>& stored = m_config->getStoredFields();
|
1509 |
const set<string>& stored = m_config->getStoredFields();
|
1503 |
for (set<string>::const_iterator it = stored.begin();
|
1510 |
for (set<string>::const_iterator it = stored.begin();
|
1504 |
it != stored.end(); it++) {
|
1511 |
it != stored.end(); it++) {
|
1505 |
string nm = m_config->fieldCanon(*it);
|
1512 |
string nm = m_config->fieldCanon(*it);
|
1506 |
if (!doc.meta[nm].empty()) {
|
1513 |
if (!doc.meta[nm].empty()) {
|
1507 |
string value =
|
1514 |
string value =
|
1508 |
neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc);
|
1515 |
neutchars(truncate_to_word(doc.meta[nm],
|
|
|
1516 |
m_idxMetaStoredLen), cstr_nc);
|
1509 |
RECORD_APPEND(record, nm, value);
|
1517 |
RECORD_APPEND(record, nm, value);
|
1510 |
}
|
1518 |
}
|
1511 |
}
|
1519 |
}
|
1512 |
|
1520 |
|
1513 |
// If empty pages (multiple break at same pos) were recorded, save
|
1521 |
// If empty pages (multiple break at same pos) were recorded, save
|
|
... |
|
... |
1609 |
const set<string>& stored = m_rcldb->m_config->getStoredFields();
|
1617 |
const set<string>& stored = m_rcldb->m_config->getStoredFields();
|
1610 |
for (set<string>::const_iterator it = stored.begin();
|
1618 |
for (set<string>::const_iterator it = stored.begin();
|
1611 |
it != stored.end(); it++) {
|
1619 |
it != stored.end(); it++) {
|
1612 |
string nm = m_rcldb->m_config->fieldCanon(*it);
|
1620 |
string nm = m_rcldb->m_config->fieldCanon(*it);
|
1613 |
if (doc.getmeta(nm, 0)) {
|
1621 |
if (doc.getmeta(nm, 0)) {
|
1614 |
string value =
|
1622 |
string value = neutchars(
|
1615 |
neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc);
|
1623 |
truncate_to_word(doc.meta[nm], m_rcldb->m_idxMetaStoredLen),
|
|
|
1624 |
cstr_nc);
|
1616 |
datadic.set(nm, value, "");
|
1625 |
datadic.set(nm, value, "");
|
1617 |
}
|
1626 |
}
|
1618 |
}
|
1627 |
}
|
1619 |
|
1628 |
|
1620 |
// Recreate the record. We want to do this with the local RECORD_APPEND
|
1629 |
// Recreate the record. We want to do this with the local RECORD_APPEND
|