recoll / Code / Diff of /src/rcldb/rcldb.cpp

Diff of /src/rcldb/rcldb.cpp [e5ec23] .. [16250f]

Switch to unified view


...
    , m_wqueue("DbUpd", 
           m_rcldb->m_config->getThrConf(RclConfig::ThrDbWrite).first),
      m_totalworkns(0LL), m_havewriteq(false)
#endif // IDX_THREADS
{ 
    LOGDEB1("Native::Native: me " << this << "\n");
}

Db::Native::~Native() 
{ 
    LOGDEB1("Native::~Native: me " << this << "\n");
#ifdef IDX_THREADS
    if (m_havewriteq) {
    void *status = m_wqueue.setTerminateAndWait();
        if (status) {
            LOGDEB1("Native::~Native: worker status " << status << "\n");
        }
    }
#endif // IDX_THREADS
}

...
        return (void*)1;
    }
    bool status = false;
    switch (tsk->op) {
    case DbUpdTask::AddOrUpdate:
        LOGDEB("DbUpdWorker: got add/update task, ql " << qsz << "\n");
        status = ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm, 
                        tsk->doc, tsk->txtlen);
        break;
    case DbUpdTask::Delete:
        LOGDEB("DbUpdWorker: got delete task, ql " << qsz << "\n");
        status = ndbp->purgeFileWrite(false, tsk->udi, tsk->uniterm);
        break;
    case DbUpdTask::PurgeOrphans:
        LOGDEB("DbUpdWorker: got orphans purge task, ql " << qsz << "\n");
        status = ndbp->purgeFileWrite(true, tsk->udi, tsk->uniterm);
        break;
    default:
        LOGERR("DbUpdWorker: unknown op " << tsk->op << " !!\n");
        break;
    }
    if (!status) {
        LOGERR("DbUpdWorker: xxWrite failed\n");
        tqp->workerExit();
        delete tsk;
        return (void*)0;
    }
    delete tsk;
...
    m_havewriteq = false;
    const RclConfig *cnf = m_rcldb->m_config;
    int writeqlen = cnf->getThrConf(RclConfig::ThrDbWrite).first;
    int writethreads = cnf->getThrConf(RclConfig::ThrDbWrite).second;
    if (writethreads > 1) {
    LOGINFO("RclDb: write threads count was forced down to 1\n");
    writethreads = 1;
    }
    if (writeqlen >= 0 && writethreads > 0) {
    if (!m_wqueue.start(writethreads, DbUpdWorker, this)) {
        LOGERR("Db::Db: Worker start failed\n");
        return;
    }
    m_havewriteq = true;
    }
    LOGDEB("RclDb:: threads: haveWriteQ " << m_havewriteq << ", wqlen " <<
           writeqlen << " wqts " << writethreads << "\n");
}

#endif // IDX_THREADS

/* See comment in class declaration: return all subdocuments of a
 * document given by its unique id. 
*/
bool Db::Native::subDocs(const string &udi, int idxi, 
             vector<Xapian::docid>& docids) 
{
    LOGDEB2("subDocs: [" << uniterm << "]\n");
    string pterm = make_parentterm(udi);
    vector<Xapian::docid> candidates;
    XAPTRY(docids.clear();
           candidates.insert(candidates.begin(), xrdb.postlist_begin(pterm), 
                 xrdb.postlist_end(pterm)),
           xrdb, m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
        LOGERR("Rcl::Db::subDocs: " << m_rcldb->m_reason << "\n");
        return false;
    } else {
    for (unsigned int i = 0; i < candidates.size(); i++) {
        if (whatDbIdx(candidates[i]) == (size_t)idxi) {
        docids.push_back(candidates[i]);
        }
    }
        LOGDEB0("Db::Native::subDocs: returning " << docids.size() << " ids\n");
        return true;
    }
}

bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi)
...
    Xapian::TermIterator xit;
    XAPTRY(xit = xdoc.termlist_begin();
       xit.skip_to(wrap_prefix(udi_prefix)),
           xrdb, m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
    LOGERR("xdocToUdi: xapian error: " << m_rcldb->m_reason << "\n");
    return false;
    }
    if (xit != xdoc.termlist_end()) {
    udi = *xit;
    if (!udi.empty()) {
...
// Clear term from document if its frequency is 0. This should
// probably be done by Xapian when the freq goes to 0 when removing a
// posting, but we have to do it ourselves
bool Db::Native::clearDocTermIfWdf0(Xapian::Document& xdoc, const string& term)
{
    LOGDEB1("Db::clearDocTermIfWdf0: [" << term << "]\n");

    // Find the term
    Xapian::TermIterator xit;
    XAPTRY(xit = xdoc.termlist_begin(); xit.skip_to(term);,
       xrdb, m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
    LOGERR("Db::clearDocTerm...: [" << term << "] skip failed: " <<
               m_rcldb->m_reason << "\n");
    return false;
    }
    if (xit == xdoc.termlist_end() || term.compare(*xit)) {
  LOGDEB0("Db::clearDocTermIFWdf0: term [" << term <<
                "] not found. xit: [" <<
                (xit == xdoc.termlist_end() ? "EOL": *xit) << "]\n");
    return false;
    }

    // Clear the term if its frequency is 0
    if (xit.get_wdf() == 0) {
    LOGDEB1("Db::clearDocTermIfWdf0: clearing [" << term << "]\n");
    XAPTRY(xdoc.remove_term(term), xwdb, m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
        LOGDEB0("Db::clearDocTermIfWdf0: failed [" << term << "]: " <<
                    m_rcldb->m_reason << "\n");
    }
    }
    return true;
}

...
// prefix. We also remove the postings for the unprefixed terms (that
// is, we undo what we did when indexing).
bool Db::Native::clearField(Xapian::Document& xdoc, const string& pfx,
                Xapian::termcount wdfdec)
{
    LOGDEB1("Db::clearField: clearing prefix [" << pfx << "] for docid " <<
            xdoc.get_docid() << "\n");

    vector<DocPosting> eraselist;

    string wrapd = wrap_prefix(pfx);

...
        Xapian::TermIterator xit;
        xit = xdoc.termlist_begin();
        xit.skip_to(wrapd);
        while (xit != xdoc.termlist_end() && 
        !(*xit).compare(0, wrapd.size(), wrapd)) {
        LOGDEB1("Db::clearfield: erasing for [" << *xit << "]\n");
        Xapian::PositionIterator posit;
        for (posit = xit.positionlist_begin();
             posit != xit.positionlist_end(); posit++) {
            eraselist.push_back(DocPosting(*xit, *posit));
            eraselist.push_back(DocPosting(strip_prefix(*xit), *posit));
...
        continue;
    } XCATCHERROR(m_rcldb->m_reason);
    break;
    }
    if (!m_rcldb->m_reason.empty()) {
    LOGERR("Db::clearField: failed building erase list: " <<
               m_rcldb->m_reason << "\n");
    return false;
    }

    // Now remove the found positions, and the terms if the wdf is 0
    for (vector<DocPosting>::const_iterator it = eraselist.begin();
     it != eraselist.end(); it++) {
    LOGDEB1("Db::clearField: remove posting: [" << it->term << "] pos [" <<
                it->pos << "]\n");
    XAPTRY(xdoc.remove_posting(it->term, it->pos, wdfdec);, 
           xwdb,m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
        // Not that this normally fails for non-prefixed XXST and
        // ND, don't make a fuss
      LOGDEB1("Db::clearFiedl: remove_posting failed for [" << it->term <<
                    "]," << it->pos << ": " << m_rcldb->m_reason << "\n");
    }
    clearDocTermIfWdf0(xdoc, it->term);
    }
    return true;
}

// Check if doc given by udi is indexed by term
bool Db::Native::hasTerm(const string& udi, int idxi, const string& term)
{
    LOGDEB2("Native::hasTerm: udi [" << udi << "] term [" << term << "]\n");
    Xapian::Document xdoc;
    if (getDoc(udi, idxi, xdoc)) {
    Xapian::TermIterator xit;
    XAPTRY(xit = xdoc.termlist_begin();
           xit.skip_to(term);,
           xrdb, m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
        LOGERR("Rcl::Native::hasTerm: " << m_rcldb->m_reason << "\n");
        return false;
    }
    if (xit != xdoc.termlist_end() && !term.compare(*xit)) {
        return true;
    }
...
        xrdb.reopen();
            continue;
    } XCATCHERROR(m_rcldb->m_reason);
        break;
    }
    LOGERR("Db::Native::getDoc: Xapian error: " << m_rcldb->m_reason << "\n");
    return 0;
}

// Turn data record from db into document fields
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, 
                Doc &doc)
{
    LOGDEB2("Db::dbDataToRclDoc: data:\n" << data << "\n");
    ConfSimple parms(data);
    if (!parms.ok())
    return false;

    doc.xdocid = docid;
...
       if (pos != xrdb.positionlist_end(docid, page_break_term)) {
           return true;
       },
       xrdb, ermsg);
    if (!ermsg.empty()) {
    LOGERR("Db::Native::hasPages: xapian error: " << ermsg << "\n");
    }
    return false;
}

// Return the positions list for the page break term
...
    try {
    for (pos = xrdb.positionlist_begin(docid, qterm); 
         pos != xrdb.positionlist_end(docid, qterm); pos++) {
        int ipos = *pos;
        if (ipos < int(baseTextPosition)) {
        LOGDEB("getPagePositions: got page position " << ipos
                       << " not in body\n");
        // Not in text body. Strange...
        continue;
        }
        map<int, int>::iterator it = mbreaksmap.find(ipos);
        if (it != mbreaksmap.end()) {
        LOGDEB1("getPagePositions: found multibreak at " << ipos <<
                        " incr " << it->second << "\n");
        for (int i = 0 ; i < it->second; i++) 
            vpos.push_back(ipos);
        }
        vpos.push_back(ipos);
    } 
...
    // to do this after having prepared the document, but it needs to be in
    // the single-threaded section.
    if (m_rcldb->m_maxFsOccupPc > 0 && 
    (m_rcldb->m_occFirstCheck || 
     (m_rcldb->m_curtxtsz - m_rcldb->m_occtxtsz) / MB >= 1)) {
    LOGDEB("Db::add: checking file system usage\n");
    int pc;
    m_rcldb->m_occFirstCheck = 0;
    if (fsocc(m_rcldb->m_basedir, &pc) && pc >= m_rcldb->m_maxFsOccupPc) {
        LOGERR("Db::add: stop indexing: file system " << pc << " %" <<
                   " full > max " << m_rcldb->m_maxFsOccupPc << " %" << "\n");
        return false;
    }
    m_rcldb->m_occtxtsz = m_rcldb->m_curtxtsz;
    }

...
    if (did < m_rcldb->updated.size()) {
            // This is necessary because only the file-level docs are tested
            // by needUpdate(), so the subdocs existence flags are only set
            // here.
        m_rcldb->updated[did] = true;
        LOGINFO("Db::add: docid " << did << " updated [" << fnc << "]\n");
    } else {
        LOGINFO("Db::add: docid " << did << " added [" << fnc << "]\n");
    }
    } XCATCHERROR(ermsg);

    if (!ermsg.empty()) {
    LOGERR("Db::add: replace_document failed: " << ermsg << "\n");
    ermsg.erase();
    // FIXME: is this ever actually needed?
    try {
        xwdb.add_document(*newdocument_ptr);
      LOGDEB("Db::add: " << fnc <<
                   " added (failed re-seek for duplicate)\n");
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
        LOGERR("Db::add: add_document failed: " << ermsg << "\n");
        return false;
    }
    }

    // Test if we're over the flush threshold (limit memory usage):
...
    string sig;
    if (orphansOnly) {
        Xapian::Document doc = xwdb.get_document(*docid);
        sig = doc.get_value(VALUE_SIG);
        if (sig.empty()) {
        LOGINFO("purgeFileWrite: got empty sig\n");
        return false;
        }
    } else {
        LOGDEB("purgeFile: delete docid " << *docid << "\n");
        xwdb.delete_document(*docid);
    }
    vector<Xapian::docid> docids;
    subDocs(udi, 0, docids);
    LOGDEB("purgeFile: subdocs cnt " << docids.size() << "\n");
    for (vector<Xapian::docid>::iterator it = docids.begin();
         it != docids.end(); it++) {
        if (m_rcldb->m_flushMb > 0) {
        Xapian::termcount trms = xwdb.get_doclength(*it);
        m_rcldb->maybeflush(trms * 5);
...
        string subdocsig;
        if (orphansOnly) {
        Xapian::Document doc = xwdb.get_document(*it);
        subdocsig = doc.get_value(VALUE_SIG);
        if (subdocsig.empty()) {
            LOGINFO("purgeFileWrite: got empty sig for subdoc??\n");
            continue;
        }
        }
        
        if (!orphansOnly || sig != subdocsig) {
        LOGDEB("Db::purgeFile: delete subdoc " << *it << "\n");
        xwdb.delete_document(*it);
        }
    }
    return true;
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
    LOGERR("Db::purgeFileWrite: " << ermsg << "\n");
    }
    return false;
}


...

    if (m_ndb == 0 || m_config == 0) {
    m_reason = "Null configuration or Xapian Db";
    return false;
    }
    LOGDEB("Db::open: m_isopen " << m_ndb->m_isopen << " m_iswritable " <<
           m_ndb->m_iswritable << " mode " << mode << "\n");

    if (m_ndb->m_isopen) {
    // We used to return an error here but I see no reason to
    if (!close())
        return false;
...
        // subDocs(). This issue has been gone for a long time
                // (now: Xapian 1.2) and the separate objects seem to
                // trigger other Xapian issues, so the query db is now
                // a clone of the update one.
        m_ndb->xrdb = m_ndb->xwdb;
        LOGDEB("Db::open: lastdocid: " << m_ndb->xwdb.get_lastdocid() <<
                       "\n");
                LOGDEB2("Db::open: resetting updated\n");
                updated.resize(m_ndb->xwdb.get_lastdocid() + 1);
                for (unsigned int i = 0; i < updated.size(); i++)
                    updated[i] = false;
        }
        break;
...
        m_ndb->xrdb = Xapian::Database(dir);
        for (vector<string>::iterator it = m_extraDbs.begin();
         it != m_extraDbs.end(); it++) {
        if (error)
            *error = DbOpenExtraDb;
        LOGDEB("Db::Open: adding query db [" << &(*it) << "]\n");
                // An error here used to be non-fatal (1.13 and older)
                // but I can't see why
                m_ndb->xrdb.add_database(Xapian::Database(*it));
        }
        break;
...
    // truncated db
    if (mode != DbTrunc && m_ndb->xrdb.get_doccount() > 0) {
        string version = m_ndb->xrdb.get_metadata(cstr_RCL_IDX_VERSION_KEY);
        if (version.compare(cstr_RCL_IDX_VERSION)) {
        m_ndb->m_noversionwrite = true;
      LOGERR("Rcl::Db::open: file index [" << version <<
                       "], software [" << cstr_RCL_IDX_VERSION << "]\n");
        throw Xapian::DatabaseError("Recoll index version mismatch",
                        "", "");
        }
    }
    m_mode = mode;
...
        *error = DbOpenNoError;
    return true;
    } XCATCHERROR(ermsg);

    m_reason = ermsg;
    LOGERR("Db::open: exception while opening [" <<dir<< "]: " << ermsg << "\n");
    return false;
}

// Note: xapian has no close call, we delete and recreate the db
bool Db::close()
{
    LOGDEB1("Db::close()\n");
    return i_close(false);
}
bool Db::i_close(bool final)
{
    if (m_ndb == 0)
    return false;
    LOGDEB("Db::i_close(" << final << "): m_isopen " << m_ndb->m_isopen <<
           " m_iswritable " << m_ndb->m_iswritable << "\n");
    if (m_ndb->m_isopen == false && !final) 
    return true;

    string ermsg;
    try {
...
        waitUpdIdle();
#endif
        if (!m_ndb->m_noversionwrite)
        m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, 
                     cstr_RCL_IDX_VERSION);
        LOGDEB("Rcl::Db:close: xapian will close. May take some time\n");
    }
    deleteZ(m_ndb);
    if (w)
        LOGDEB("Rcl::Db:close() xapian close done.\n");
    if (final) {
        return true;
    }
    m_ndb = new Native(this);
    if (m_ndb) {
        return true;
    }
    LOGERR("Rcl::Db::close(): cant recreate db object\n");
    return false;
    } XCATCHERROR(ermsg);
    LOGERR("Db:close: exception while deleting db: " << ermsg << "\n");
    return false;
}

// Reopen the db with a changed list of additional dbs
bool Db::adjustdbs()
{
    if (m_mode != DbRO) {
        LOGERR("Db::adjustdbs: mode not RO\n");
        return false;
    }
    if (m_ndb && m_ndb->m_isopen) {
    if (!close())
        return false;
...
        return -1;

    XAPTRY(res = m_ndb->xrdb.get_doccount(), m_ndb->xrdb, m_reason);

    if (!m_reason.empty()) {
        LOGERR("Db::docCnt: got error: " << m_reason << "\n");
        return -1;
    }
    return res;
}

...
        return -1;

    string term = _term;
    if (o_index_stripchars)
    if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
        LOGINFO("Db::termDocCnt: unac failed for [" << _term << "]\n");
        return 0;
    }

    if (m_stops.isStop(term)) {
    LOGDEB1("Db::termDocCnt [" << term << "] in stop list\n");
    return 0;
    }

    XAPTRY(res = m_ndb->xrdb.get_termfreq(term), m_ndb->xrdb, m_reason);

    if (!m_reason.empty()) {
        LOGERR("Db::termDocCnt: got error: " << m_reason << "\n");
        return -1;
    }
    return res;
}

bool Db::addQueryDb(const string &_dir) 
{
    string dir = _dir;
    LOGDEB0("Db::addQueryDb: ndb " << m_ndb << " iswritable " <<
            ((m_ndb)?m_ndb->m_iswritable:0) << " db [" << dir << "]\n");
    if (!m_ndb)
    return false;
    if (m_ndb->m_iswritable)
    return false;
    dir = path_canon(dir);
...
    return m_ndb->whatDbIdx(doc.xdocid);
}

size_t Db::Native::whatDbIdx(Xapian::docid id)
{
    LOGDEB1("Db::whatDbIdx: xdocid " << id << ", " <<
            m_rcldb->m_extraDbs.size() << " extraDbs\n");
    if (id == 0) 
    return (size_t)-1;
    if (m_rcldb->m_extraDbs.size() == 0)
    return 0;
    return (id - 1) % (m_rcldb->m_extraDbs.size() + 1);
...

bool Db::testDbDir(const string &dir, bool *stripped_p)
{
    string aerr;
    bool mstripped = true;
    LOGDEB("Db::testDbDir: [" << dir << "]\n");
    try {
    Xapian::Database db(dir);
    // If we have terms with a leading ':' it's an
    // unstripped index
    Xapian::TermIterator term = db.allterms_begin(":");
...
        mstripped = true;
    else
        mstripped = false;
    } XCATCHERROR(aerr);
    if (!aerr.empty()) {
    LOGERR("Db::Open: error while trying to open database from [" <<
               dir << "]: " << aerr << "\n");
    return false;
    }
    if (stripped_p) 
    *stripped_p = mstripped;

...
        // Index the possibly prefixed start term.
        doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
        ++basepos;
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
        LOGERR("Db: xapian add_posting error " << ermsg << "\n");
        goto out;
    }

    if (!TextSplitP::text_to_words(in)) {
        LOGDEB("TextSplitDb: TextSplit::text_to_words failed\n");
        goto out;
    }

    try {
        // Index the possibly prefixed end term.
        doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
                ft.wdfinc);
        ++basepos;
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
        LOGERR("Db: xapian add_posting error " << ermsg << "\n");
        goto out;
    }

    out:
    basepos += curpos + 100;
...
    if (term.empty())
        return true;
    string ermsg;
    try {
        // Index without prefix, using the field-specific weighting
        LOGDEB1("Emitting term at " << pos << " : [" << term << "]\n");
            if (!m_ts->ft.pfxonly)
                m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);

#ifdef TESTING_XAPIAN_SPELL
        if (Db::isSpellingCandidate(term, false)) {
...
        m_ts->doc.add_posting(m_ts->ft.pfx + term, pos, 
                                      m_ts->ft.wdfinc);
        }
        return true;
    } XCATCHERROR(ermsg);
    LOGERR("Db: xapian add_posting error " << ermsg << "\n");
    return false;
    }
    void newpage(int pos)
    {
    pos += m_ts->basepos;
    if (pos < int(baseTextPosition)) {
        LOGDEB("newpage: not in body: " << pos << "\n");
        return;
    }

    m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
    if (pos == m_lastpagepos) {
        m_pageincr++;
      LOGDEB2("newpage: same pos, pageincr " << m_pageincr <<
                    " lastpagepos " << m_lastpagepos << "\n");
    } else {
      LOGDEB2("newpage: pos change, pageincr " << m_pageincr <<
                    " lastpagepos " << m_lastpagepos << "\n");
        if (m_pageincr > 0) {
        // Remember the multiple page break at this position
        unsigned int relpos = m_lastpagepos - baseTextPosition;
        LOGDEB2("Remembering multiple page break. Relpos " << relpos <<
                        " cnt " << m_pageincr << "\n");
        m_pageincrvec.push_back(pair<int, int>(relpos, m_pageincr));
        }
        m_pageincr = 0;
    }
    m_lastpagepos = pos;
...

    virtual bool flush()
    {
    if (m_pageincr > 0) {
        unsigned int relpos = m_lastpagepos - baseTextPosition;
        LOGDEB2("Remembering multiple page break. Position " << relpos <<
                    " cnt " << m_pageincr << "\n");
        m_pageincrvec.push_back(pair<int, int>(relpos, m_pageincr));
        m_pageincr = 0;
    }
    return TermProc::flush();
    }
...

// At the moment, we normally use the Xapian speller for Katakana and
// aspell for everything else
bool Db::getSpellingSuggestions(const string& word, vector<string>& suggs)
{
    LOGDEB("Db::getSpellingSuggestions:[" << word << "]\n");
    suggs.clear();
    if (nullptr == m_ndb) {
    return false;
    }

...
            m_aspell = new Aspell(m_config);
            if (m_aspell) {
                string reason;
                m_aspell->init(reason);
                if (!m_aspell->ok()) {
                    LOGDEB("Aspell speller init failed: " << reason << endl);
                    delete m_aspell;
                    m_aspell = 0;
                }
            }
        }
...
        if (!meta_it->second.empty()) {
        const FieldTraits *ftp;
        // We don't test for an empty prefix here. Some fields are part
        // of the internal conf with an empty prefix (ie: abstract).
        if (!fieldToTraits(meta_it->first, &ftp)) {
          LOGDEB0("Db::add: no prefix for field [" <<
                            meta_it->first << "], no indexing\n");
            continue;
        }
      LOGDEB0("Db::add: field [" << meta_it->first << "] pfx [" <<
                        ftp->pfx << "] inc " << ftp->wdfinc << ": [" <<
                        meta_it->second << "]\n");
                splitter.setTraits(*ftp);
        if (!splitter.text_to_words(meta_it->second)) {
            LOGDEB("Db::addOrUpdate: split failed for " <<
                           meta_it->first << "\n");
                }
        }
    }

        // Reset to no prefix and default params
        splitter.setTraits(FieldTraits());

    if (splitter.curpos < baseTextPosition)
        splitter.basepos = baseTextPosition;

    // Split and index body text
    LOGDEB2("Db::add: split body: [" << doc.text << "]\n");

#ifdef TEXTSPLIT_STATS
    splitter.resetStats();
#endif
    if (!splitter.text_to_words(doc.text))
        LOGDEB("Db::addOrUpdate: split failed for main text\n");

#ifdef TEXTSPLIT_STATS
    // Reject bad data. unrecognized base64 text is characterized by
    // high avg word length and high variation (because there are
    // word-splitters like +/ inside the data).
    TextSplit::Stats::Values v = splitter.getStats();
    // v.avglen > 15 && v.sigma > 12 
    if (v.count > 200 && (v.avglen > 10 && v.sigma / v.avglen > 0.8)) {
      LOGINFO("RclDb::addOrUpdate: rejecting doc for bad stats count " <<
                    v.count << " avglen " << v.avglen << " sigma " << v.sigma <<
                    " url [" << doc.url << "] ipath [" << doc.ipath <<
                    "] text " << doc.text << "\n");
            delete newdocument_ptr;
        return true;
    }
#endif

...
        MD5HexScan(*md5, digest);
        newdocument.add_value(VALUE_MD5, digest);
        newdocument.add_boolean_term(wrap_prefix("XM") + *md5);
    }

    LOGDEB0("Rcl::Db::add: new doc record:\n" << record << "\n");
    newdocument.set_data(record);
    }
#ifdef IDX_THREADS
    if (m_ndb->m_havewriteq) {
    DbUpdTask *tp = new DbUpdTask(DbUpdTask::AddOrUpdate, udi, uniterm, 
                      newdocument_ptr, doc.text.length());
    if (!m_ndb->m_wqueue.put(tp)) {
        LOGERR("Db::addOrUpdate:Cant queue task\n");
            delete newdocument_ptr;
        return false;
    } else {
        return true;
    }
...
}

bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi, 
                    Doc &doc, Xapian::Document& xdoc)
{
    LOGDEB0("Db::docToXdocXattrOnly\n");
#ifdef IDX_THREADS
    std::unique_lock<std::mutex> lock(m_mutex);
#endif

    // Read existing document and its data record
    if (getDoc(udi, 0, xdoc) == 0) {
    LOGERR("docToXdocXattrOnly: existing doc not found\n");
    return false;
    }
    string data;
    XAPTRY(data = xdoc.get_data(), xrdb, m_rcldb->m_reason);
    if (!m_rcldb->m_reason.empty()) {
        LOGERR("Db::xattrOnly: got error: " << m_rcldb->m_reason << "\n");
        return false;
    }

    // Clear the term lists for the incoming fields and index the new values
    map<string, string>::iterator meta_it;
    for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) {
    const FieldTraits *ftp;
    if (!m_rcldb->fieldToTraits(meta_it->first, &ftp) || ftp->pfx.empty()) {
      LOGDEB0("Db::xattrOnly: no prefix for field [" <<
                    meta_it->first << "], skipped\n");
        continue;
    }
    // Clear the previous terms for the field
    clearField(xdoc, ftp->pfx, ftp->wdfinc);
  LOGDEB0("Db::xattrOnly: field [" << meta_it->first << "] pfx [" <<
                ftp->pfx << "] inc " << ftp->wdfinc << ": [" <<
                meta_it->second << "]\n");
    splitter->setTraits(*ftp);
    if (!splitter->text_to_words(meta_it->second)) {
        LOGDEB("Db::xattrOnly: split failed for " << meta_it->first << "\n");
        }
    }
    xdoc.add_value(VALUE_SIG, doc.sig);

    // Parse current data record into a dict for ease of processing
    ConfSimple datadic(data);
    if (!datadic.ok()) {
    LOGERR("db::docToXdocXattrOnly: failed turning data rec to dict\n");
    return false;
    }

    // For each "stored" field, check if set in doc metadata and
    // update the value if it is
...
    string ermsg;
    try {
        m_ndb->xwdb.commit();
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
        LOGERR("Db::waitUpdIdle: flush() failed: " << ermsg << "\n");
    }
    m_ndb->m_totalworkns += chron.nanos();
  LOGINFO("Db::waitUpdIdle: total xapian work " <<
                lltodecstr(m_ndb->m_totalworkns/1000000) << " mS\n");
    }
}
#endif

// Flush when idxflushmbs is reached
bool Db::maybeflush(int64_t moretext)
{
    if (m_flushMb > 0) {
    m_curtxtsz += moretext;
    if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
        LOGDEB("Db::add/delete: txt size >= " << m_flushMb <<
                   " Mb, flushing\n");
        return doFlush();
    }
    }
    return true;
}

bool Db::doFlush()
{
    if (!m_ndb) {
    LOGERR("Db::doFLush: no ndb??\n");
    return false;
    }
    string ermsg;
    try {
    m_ndb->xwdb.commit();
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
    LOGERR("Db::doFlush: flush() failed: " << ermsg << "\n");
    return false;
    }
    m_flushtxtsz = m_curtxtsz;
    return true;
}
...
void Db::setExistingFlags(const string& udi, unsigned int docid)
{
    if (m_mode == DbRO)
        return;
    if (docid == (unsigned int)-1) {
        LOGERR("Db::setExistingFlags: called with bogus docid !!\n");
        return;
    }
#ifdef IDX_THREADS
    std::unique_lock<std::mutex> lock(m_ndb->m_mutex);
#endif
...

void Db::i_setExistingFlags(const string& udi, unsigned int docid)
{
    // Set the up to date flag for the document and its subdocs
    if (docid >= updated.size()) {
        LOGERR("needUpdate: existing docid beyond updated.size(). Udi [" <<
               udi << "], docid " << docid << ", updated.size() " <<
               updated.size() << "\n");
        return;
    } else {
        updated[docid] = true;
    }

    // Set the existence flag for all the subdocs (if any)
    vector<Xapian::docid> docids;
    if (!m_ndb->subDocs(udi, 0, docids)) {
        LOGERR("Rcl::Db::needUpdate: can't get subdocs\n");
        return;
    }
    for (vector<Xapian::docid>::iterator it = docids.begin();
         it != docids.end(); it++) {
        if (*it < updated.size()) {
            LOGDEB2("Db::needUpdate: docid " << (*it) << " set\n");
            updated[*it] = true;
        }
    }
}

...

    // Try to find the document indexed by the uniterm. 
    Xapian::PostingIterator docid;
    XAPTRY(docid = m_ndb->xrdb.postlist_begin(uniterm), m_ndb->xrdb, m_reason);
    if (!m_reason.empty()) {
        LOGERR("Db::needUpdate: xapian::postlist_begin failed: " <<
               m_reason << "\n");
        return false;
    }
    if (docid == m_ndb->xrdb.postlist_end(uniterm)) {
        // No document exists with this path: we do need update
        LOGDEB("Db::needUpdate:yes (new): [" << uniterm << "]\n");
        return true;
    }
    Xapian::Document xdoc;
    XAPTRY(xdoc = m_ndb->xrdb.get_document(*docid), m_ndb->xrdb, m_reason);
    if (!m_reason.empty()) {
        LOGERR("Db::needUpdate: get_document error: " << m_reason << "\n");
        return true;
    }

    if (docidp) {
        *docidp = *docid;
...

    // Retrieve old file/doc signature from value
    string osig;
    XAPTRY(osig = xdoc.get_value(VALUE_SIG), m_ndb->xrdb, m_reason);
    if (!m_reason.empty()) {
        LOGERR("Db::needUpdate: get_value error: " << m_reason << "\n");
        return true;
    }
    LOGDEB2("Db::needUpdate: oldsig [" << osig << "] new [" << sig << "]\n");

    if (osigp) {
        *osigp = osig;
    }

    // Compare new/old sig
    if (sig != osig) {
        LOGDEB("Db::needUpdate:yes: olsig [" << osig << "] new [" << sig <<
               "] [" << uniterm << "]\n");
        // Db is not up to date. Let's index the file
        return true;
    }

    // Up to date. Set the existance flags in the map for the doc and
    // its subdocs.
    LOGDEB("Db::needUpdate:no: [" << uniterm << "]\n");
    i_setExistingFlags(udi, *docid);
    return false;
}

// Return existing stem db languages
vector<string> Db::getStemLangs()
{
    LOGDEB("Db::getStemLang\n");
    vector<string> langs;
    if (m_ndb == 0 || m_ndb->m_isopen == false)
    return langs;
    StemDb db(m_ndb->xrdb);
    db.getMembers(langs);
...
/**
 * Delete stem db for given language
 */
bool Db::deleteStemDb(const string& lang)
{
    LOGDEB("Db::deleteStemDb(" << lang << ")\n");
    if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable)
    return false;
    XapWritableSynFamily db(m_ndb->xwdb, synFamStem);
    return db.deleteMember(lang);
}
...
 * with documents indexed by a single term (the stem), and with the list of
 * parent terms in the document data.
 */
bool Db::createStemDbs(const vector<string>& langs)
{
    LOGDEB("Db::createStemDbs\n");
    if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable) {
    LOGERR("createStemDb: db not open or not writable\n");
    return false;
    }

    return createExpansionDbs(m_ndb->xwdb, langs);
}
...
 * after a full file-system tree walk, else the file existence flags will 
 * be wrong.
 */
bool Db::purge()
{
    LOGDEB("Db::purge\n");
    if (m_ndb == 0)
    return false;
    LOGDEB("Db::purge: m_isopen " << m_ndb->m_isopen << " m_iswritable " <<
           m_ndb->m_iswritable << "\n");
    if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false) 
    return false;

#ifdef IDX_THREADS
    // If we manage our own write queue, make sure it's drained and closed
...
    // that any added document would go to the index. Kept here
    // because it doesn't really hurt.
    try {
    m_ndb->xwdb.commit();
    } catch (...) {
    LOGERR("Db::purge: 1st flush failed\n");

    }

    // Walk the document array and delete any xapian document whose
    // flag is not set (we did not see its source during indexing).
...
    if (!updated[docid]) {
        if ((purgecount+1) % 100 == 0) {
        try {
            CancelCheck::instance().checkCancel();
        } catch(CancelExcept) {
            LOGINFO("Db::purge: partially cancelled\n");
            break;
        }
        }

        try {
...
            // bad for performance.
            Xapian::termcount trms = m_ndb->xwdb.get_doclength(docid);
            maybeflush(trms * 5);
        }
        m_ndb->xwdb.delete_document(docid);
        LOGDEB("Db::purge: deleted document #" << docid << "\n");
        } catch (const Xapian::DocNotFoundError &) {
        LOGDEB0("Db::purge: document #" << docid << " not found\n");
        } catch (const Xapian::Error &e) {
        LOGERR("Db::purge: document #" << docid << ": " <<
                       e.get_msg() << "\n");
        } catch (...) {
        LOGERR("Db::purge: document #" << docid << ": unknown error\n");
        }
        purgecount++;
    }
    }

    try {
    m_ndb->xwdb.commit();
    } catch (...) {
    LOGERR("Db::purge: 2nd flush failed\n");
    }
    return true;
}

// Test for doc existence.
...
        } else {
        return true;
    }
    } XCATCHERROR(ermsg);
    if (!ermsg.empty()) {
    LOGERR("Db::docExists(" << uniterm << ") " << ermsg << "\n");
    }
    return false;
}

/* Delete document(s) for given unique identifier (doc and descendents) */
bool Db::purgeFile(const string &udi, bool *existed)
{
    LOGDEB("Db:purgeFile: [" << udi << "]\n");
    if (m_ndb == 0 || !m_ndb->m_iswritable)
    return false;

    string uniterm = make_uniterm(udi);
    bool exists = docExists(uniterm);
...
#ifdef IDX_THREADS
    if (m_ndb->m_havewriteq) {
    DbUpdTask *tp = new DbUpdTask(DbUpdTask::Delete, udi, uniterm, 
                      0, (size_t)-1);
    if (!m_ndb->m_wqueue.put(tp)) {
        LOGERR("Db::purgeFile:Cant queue task\n");
        return false;
    } else {
        return true;
    }
    }
...
/* Delete subdocs with an out of date sig. We do this to purge
   obsolete subdocs during a partial update where no general purge
   will be done */
bool Db::purgeOrphans(const string &udi)
{
    LOGDEB("Db:purgeOrphans: [" << udi << "]\n");
    if (m_ndb == 0 || !m_ndb->m_iswritable)
    return false;

    string uniterm = make_uniterm(udi);

#ifdef IDX_THREADS
    if (m_ndb->m_havewriteq) {
    DbUpdTask *tp = new DbUpdTask(DbUpdTask::PurgeOrphans, udi, uniterm, 
                      0, (size_t)-1);
    if (!m_ndb->m_wqueue.put(tp)) {
        LOGERR("Db::purgeFile:Cant queue task\n");
        return false;
    } else {
        return true;
    }
    }
...
// by the GUI history feature and by open parent/getenclosing
// ! The return value is always true except for fatal errors. Document
//  existence should be tested by looking at doc.pc
bool Db::getDoc(const string &udi, const Doc& idxdoc, Doc &doc)
{
    LOGDEB("Db:getDoc: [" << udi << "]\n");
    if (m_ndb == 0)
    return false;

    // Initialize what we can in any case. If this is history, caller
    // will make partial display in case of error
...
    // Document found in history no longer in the
    // database.  We return true (because their might be
    // other ok docs further) but indicate the error with
    // pc = -1
    doc.pc = -1;
    LOGINFO("Db:getDoc: no such doc in index: [" << udi << "]\n");
    return true;
    }
}

bool Db::hasSubDocs(const Doc &idoc)
{
    if (m_ndb == 0)
    return false;
    string inudi;
    if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
    LOGERR("Db::hasSubDocs: no input udi or empty\n");
    return false;
    }
    LOGDEB1("Db::hasSubDocs: idxi " << idoc.idxi << " inudi [" <<inudi << "]\n");

    // Not sure why we perform both the subDocs() call and the test on
    // has_children. The former will return docs if the input is a
    // file-level document, but the latter should be true both in this
    // case and if the input is already a subdoc, so the first test
    // should be redundant. Does not hurt much in any case, to be
    // checked one day.
    vector<Xapian::docid> docids;
    if (!m_ndb->subDocs(inudi, idoc.idxi, docids)) {
    LOGDEB("Db::hasSubDocs: lower level subdocs failed\n");
    return false;
    }
    if (!docids.empty())
    return true;

...
    if (m_ndb == 0)
    return false;

    string inudi;
    if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
    LOGERR("Db::getSubDocs: no input udi or empty\n");
    return false;
    }

    string rootudi;
    string ipath = idoc.ipath;
    LOGDEB0("Db::getSubDocs: idxi " << idoc.idxi << " inudi [" << inudi <<
            "] ipath [" << ipath << "]\n");
    if (ipath.empty()) {
    // File-level doc. Use it as root
    rootudi = inudi;
    } else {
    // See if we have a parent term
    Xapian::Document xdoc;
    if (!m_ndb->getDoc(inudi, idoc.idxi, xdoc)) {
        LOGERR("Db::getSubDocs: can't get Xapian document\n");
        return false;
    }
    Xapian::TermIterator xit;
    XAPTRY(xit = xdoc.termlist_begin();
           xit.skip_to(wrap_prefix(parent_prefix)),
           m_ndb->xrdb, m_reason);
    if (!m_reason.empty()) {
        LOGERR("Db::getSubDocs: xapian error: " << m_reason << "\n");
        return false;
    }
    if (xit == xdoc.termlist_end()) {
        LOGERR("Db::getSubDocs: parent term not found\n");
        return false;
    }
    rootudi = strip_prefix(*xit);
    }

    LOGDEB("Db::getSubDocs: root: [" << rootudi << "]\n");

    // Retrieve all subdoc xapian ids for the root
    vector<Xapian::docid> docids;
    if (!m_ndb->subDocs(rootudi, idoc.idxi, docids)) {
    LOGDEB("Db::getSubDocs: lower level subdocs failed\n");
    return false;
    }

    // Retrieve doc, filter, and build output list
    for (int tries = 0; tries < 2; tries++) {
...
        Doc doc;
        doc.meta[Doc::keyudi] = docudi;
        doc.meta[Doc::keyrr] = "100%";
        doc.pc = 100;
        if (!m_ndb->dbDataToRclDoc(*it, data, doc)) {
            LOGERR("Db::getSubDocs: doc conversion error\n");
            return false;
        }
                if (ipath.empty() ||
                    FileInterner::ipathContains(ipath, doc.ipath)) {
                    subdocs.push_back(doc);
...
            continue;
    } XCATCHERROR(m_reason);
        break;
    }

    LOGERR("Db::getSubDocs: Xapian error: " << m_reason << "\n");
    return false;
}

} // End namespace Rcl


	a/src/rcldb/rcldb.cpp		b/src/rcldb/rcldb.cpp
	...		...
152	, m_wqueue("DbUpd",	152	, m_wqueue("DbUpd",
153	m_rcldb->m_config->getThrConf(RclConfig::ThrDbWrite).first),	153	m_rcldb->m_config->getThrConf(RclConfig::ThrDbWrite).first),
154	m_totalworkns(0LL), m_havewriteq(false)	154	m_totalworkns(0LL), m_havewriteq(false)
155	#endif // IDX_THREADS	155	#endif // IDX_THREADS
156	{	156	{
157	LOGDEB1("Native::Native: me " << (this) << "\n" );	157	LOGDEB1("Native::Native: me " << this << "\n");
158	}	158	}
159		159
160	Db::Native::~Native()	160	Db::Native::~Native()
161	{	161	{
162	LOGDEB1("Native::~Native: me " << (this) << "\n" );	162	LOGDEB1("Native::~Native: me " << this << "\n");
163	#ifdef IDX_THREADS	163	#ifdef IDX_THREADS
164	if (m_havewriteq) {	164	if (m_havewriteq) {
165	void *status = m_wqueue.setTerminateAndWait();	165	void *status = m_wqueue.setTerminateAndWait();
166	if (status) {	166	if (status) {
167	LOGDEB1("Native::~Native: worker status " << status << "\n");	167	LOGDEB1("Native::~Native: worker status " << status << "\n");
168	}	168	}
169	}	169	}
170	#endif // IDX_THREADS	170	#endif // IDX_THREADS
171	}	171	}
172		172
	...		...
185	return (void*)1;	185	return (void*)1;
186	}	186	}
187	bool status = false;	187	bool status = false;
188	switch (tsk->op) {	188	switch (tsk->op) {
189	case DbUpdTask::AddOrUpdate:	189	case DbUpdTask::AddOrUpdate:
190	LOGDEB("DbUpdWorker: got add/update task, ql " << (int(qsz)) << "\n" );	190	LOGDEB("DbUpdWorker: got add/update task, ql " << qsz << "\n");
191	status = ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm,	191	status = ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm,
192	tsk->doc, tsk->txtlen);	192	tsk->doc, tsk->txtlen);
193	break;	193	break;
194	case DbUpdTask::Delete:	194	case DbUpdTask::Delete:
195	LOGDEB("DbUpdWorker: got delete task, ql " << (int(qsz)) << "\n" );	195	LOGDEB("DbUpdWorker: got delete task, ql " << qsz << "\n");
196	status = ndbp->purgeFileWrite(false, tsk->udi, tsk->uniterm);	196	status = ndbp->purgeFileWrite(false, tsk->udi, tsk->uniterm);
197	break;	197	break;
198	case DbUpdTask::PurgeOrphans:	198	case DbUpdTask::PurgeOrphans:
199	LOGDEB("DbUpdWorker: got orphans purge task, ql " << (int(qsz)) << "\n" );	199	LOGDEB("DbUpdWorker: got orphans purge task, ql " << qsz << "\n");
200	status = ndbp->purgeFileWrite(true, tsk->udi, tsk->uniterm);	200	status = ndbp->purgeFileWrite(true, tsk->udi, tsk->uniterm);
201	break;	201	break;
202	default:	202	default:
203	LOGERR("DbUpdWorker: unknown op " << (tsk->op) << " !!\n" );	203	LOGERR("DbUpdWorker: unknown op " << tsk->op << " !!\n");
204	break;	204	break;
205	}	205	}
206	if (!status) {	206	if (!status) {
207	LOGERR("DbUpdWorker: xxWrite failed\n" );	207	LOGERR("DbUpdWorker: xxWrite failed\n");
208	tqp->workerExit();	208	tqp->workerExit();
209	delete tsk;	209	delete tsk;
210	return (void*)0;	210	return (void*)0;
211	}	211	}
212	delete tsk;	212	delete tsk;
	...		...
218	m_havewriteq = false;	218	m_havewriteq = false;
219	const RclConfig *cnf = m_rcldb->m_config;	219	const RclConfig *cnf = m_rcldb->m_config;
220	int writeqlen = cnf->getThrConf(RclConfig::ThrDbWrite).first;	220	int writeqlen = cnf->getThrConf(RclConfig::ThrDbWrite).first;
221	int writethreads = cnf->getThrConf(RclConfig::ThrDbWrite).second;	221	int writethreads = cnf->getThrConf(RclConfig::ThrDbWrite).second;
222	if (writethreads > 1) {	222	if (writethreads > 1) {
223	LOGINFO("RclDb: write threads count was forced down to 1\n" );	223	LOGINFO("RclDb: write threads count was forced down to 1\n");
224	writethreads = 1;	224	writethreads = 1;
225	}	225	}
226	if (writeqlen >= 0 && writethreads > 0) {	226	if (writeqlen >= 0 && writethreads > 0) {
227	if (!m_wqueue.start(writethreads, DbUpdWorker, this)) {	227	if (!m_wqueue.start(writethreads, DbUpdWorker, this)) {
228	LOGERR("Db::Db: Worker start failed\n" );	228	LOGERR("Db::Db: Worker start failed\n");
229	return;	229	return;
230	}	230	}
231	m_havewriteq = true;	231	m_havewriteq = true;
232	}	232	}
233	LOGDEB("RclDb:: threads: haveWriteQ " << (m_havewriteq) << ", wqlen " << (writeqlen) << " wqts " << (writethreads) << "\n" );	233	LOGDEB("RclDb:: threads: haveWriteQ " << m_havewriteq << ", wqlen " <<
		234	writeqlen << " wqts " << writethreads << "\n");
234	}	235	}
235		236
236	#endif // IDX_THREADS	237	#endif // IDX_THREADS
237		238
238	/* See comment in class declaration: return all subdocuments of a	239	/* See comment in class declaration: return all subdocuments of a
239	* document given by its unique id.	240	* document given by its unique id.
240	*/	241	*/
241	bool Db::Native::subDocs(const string &udi, int idxi,	242	bool Db::Native::subDocs(const string &udi, int idxi,
242	vector<Xapian::docid>& docids)	243	vector<Xapian::docid>& docids)
243	{	244	{
244	LOGDEB2("subDocs: [" << (uniterm) << "]\n" );	245	LOGDEB2("subDocs: [" << uniterm << "]\n");
245	string pterm = make_parentterm(udi);	246	string pterm = make_parentterm(udi);
246	vector<Xapian::docid> candidates;	247	vector<Xapian::docid> candidates;
247	XAPTRY(docids.clear();	248	XAPTRY(docids.clear();
248	candidates.insert(candidates.begin(), xrdb.postlist_begin(pterm),	249	candidates.insert(candidates.begin(), xrdb.postlist_begin(pterm),
249	xrdb.postlist_end(pterm)),	250	xrdb.postlist_end(pterm)),
250	xrdb, m_rcldb->m_reason);	251	xrdb, m_rcldb->m_reason);
251	if (!m_rcldb->m_reason.empty()) {	252	if (!m_rcldb->m_reason.empty()) {
252	LOGERR("Rcl::Db::subDocs: " << (m_rcldb->m_reason) << "\n" );	253	LOGERR("Rcl::Db::subDocs: " << m_rcldb->m_reason << "\n");
253	return false;	254	return false;
254	} else {	255	} else {
255	for (unsigned int i = 0; i < candidates.size(); i++) {	256	for (unsigned int i = 0; i < candidates.size(); i++) {
256	if (whatDbIdx(candidates[i]) == (size_t)idxi) {	257	if (whatDbIdx(candidates[i]) == (size_t)idxi) {
257	docids.push_back(candidates[i]);	258	docids.push_back(candidates[i]);
258	}	259	}
259	}	260	}
260	LOGDEB0("Db::Native::subDocs: returning " << (docids.size()) << " ids\n" );	261	LOGDEB0("Db::Native::subDocs: returning " << docids.size() << " ids\n");
261	return true;	262	return true;
262	}	263	}
263	}	264	}
264		265
265	bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi)	266	bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi)
	...		...
267	Xapian::TermIterator xit;	268	Xapian::TermIterator xit;
268	XAPTRY(xit = xdoc.termlist_begin();	269	XAPTRY(xit = xdoc.termlist_begin();
269	xit.skip_to(wrap_prefix(udi_prefix)),	270	xit.skip_to(wrap_prefix(udi_prefix)),
270	xrdb, m_rcldb->m_reason);	271	xrdb, m_rcldb->m_reason);
271	if (!m_rcldb->m_reason.empty()) {	272	if (!m_rcldb->m_reason.empty()) {
272	LOGERR("xdocToUdi: xapian error: " << (m_rcldb->m_reason) << "\n" );	273	LOGERR("xdocToUdi: xapian error: " << m_rcldb->m_reason << "\n");
273	return false;	274	return false;
274	}	275	}
275	if (xit != xdoc.termlist_end()) {	276	if (xit != xdoc.termlist_end()) {
276	udi = *xit;	277	udi = *xit;
277	if (!udi.empty()) {	278	if (!udi.empty()) {
	...		...
285	// Clear term from document if its frequency is 0. This should	286	// Clear term from document if its frequency is 0. This should
286	// probably be done by Xapian when the freq goes to 0 when removing a	287	// probably be done by Xapian when the freq goes to 0 when removing a
287	// posting, but we have to do it ourselves	288	// posting, but we have to do it ourselves
288	bool Db::Native::clearDocTermIfWdf0(Xapian::Document& xdoc, const string& term)	289	bool Db::Native::clearDocTermIfWdf0(Xapian::Document& xdoc, const string& term)
289	{	290	{
290	LOGDEB1("Db::clearDocTermIfWdf0: [" << (term) << "]\n" );	291	LOGDEB1("Db::clearDocTermIfWdf0: [" << term << "]\n");
291		292
292	// Find the term	293	// Find the term
293	Xapian::TermIterator xit;	294	Xapian::TermIterator xit;
294	XAPTRY(xit = xdoc.termlist_begin(); xit.skip_to(term);,	295	XAPTRY(xit = xdoc.termlist_begin(); xit.skip_to(term);,
295	xrdb, m_rcldb->m_reason);	296	xrdb, m_rcldb->m_reason);
296	if (!m_rcldb->m_reason.empty()) {	297	if (!m_rcldb->m_reason.empty()) {
297	LOGERR("Db::clearDocTerm...: [" << (term) << "] skip failed: " << (m_rcldb->m_reason) << "\n" );	298	LOGERR("Db::clearDocTerm...: [" << term << "] skip failed: " <<
		299	m_rcldb->m_reason << "\n");
298	return false;	300	return false;
299	}	301	}
300	if (xit == xdoc.termlist_end() \|\| term.compare(*xit)) {	302	if (xit == xdoc.termlist_end() \|\| term.compare(*xit)) {
301	LOGDEB0("Db::clearDocTermIFWdf0: term [" << (term) << "] not found. xit: [" << (xit == xdoc.termlist_end() ? "EOL":(*xit)) << "]\n" );	303	LOGDEB0("Db::clearDocTermIFWdf0: term [" << term <<
		304	"] not found. xit: [" <<
		305	(xit == xdoc.termlist_end() ? "EOL": *xit) << "]\n");
302	return false;	306	return false;
303	}	307	}
304		308
305	// Clear the term if its frequency is 0	309	// Clear the term if its frequency is 0
306	if (xit.get_wdf() == 0) {	310	if (xit.get_wdf() == 0) {
307	LOGDEB1("Db::clearDocTermIfWdf0: clearing [" << (term) << "]\n" );	311	LOGDEB1("Db::clearDocTermIfWdf0: clearing [" << term << "]\n");
308	XAPTRY(xdoc.remove_term(term), xwdb, m_rcldb->m_reason);	312	XAPTRY(xdoc.remove_term(term), xwdb, m_rcldb->m_reason);
309	if (!m_rcldb->m_reason.empty()) {	313	if (!m_rcldb->m_reason.empty()) {
310	LOGDEB0("Db::clearDocTermIfWdf0: failed [" << (term) << "]: " << (m_rcldb->m_reason) << "\n" );	314	LOGDEB0("Db::clearDocTermIfWdf0: failed [" << term << "]: " <<
		315	m_rcldb->m_reason << "\n");
311	}	316	}
312	}	317	}
313	return true;	318	return true;
314	}	319	}
315		320
	...		...
326	// prefix. We also remove the postings for the unprefixed terms (that	331	// prefix. We also remove the postings for the unprefixed terms (that
327	// is, we undo what we did when indexing).	332	// is, we undo what we did when indexing).
328	bool Db::Native::clearField(Xapian::Document& xdoc, const string& pfx,	333	bool Db::Native::clearField(Xapian::Document& xdoc, const string& pfx,
329	Xapian::termcount wdfdec)	334	Xapian::termcount wdfdec)
330	{	335	{
331	LOGDEB1("Db::clearField: clearing prefix [" << (pfx) << "] for docid " << (unsigned(xdoc.get_docid())) << "\n" );	336	LOGDEB1("Db::clearField: clearing prefix [" << pfx << "] for docid " <<
		337	xdoc.get_docid() << "\n");
332		338
333	vector<DocPosting> eraselist;	339	vector<DocPosting> eraselist;
334		340
335	string wrapd = wrap_prefix(pfx);	341	string wrapd = wrap_prefix(pfx);
336		342
	...		...
340	Xapian::TermIterator xit;	346	Xapian::TermIterator xit;
341	xit = xdoc.termlist_begin();	347	xit = xdoc.termlist_begin();
342	xit.skip_to(wrapd);	348	xit.skip_to(wrapd);
343	while (xit != xdoc.termlist_end() &&	349	while (xit != xdoc.termlist_end() &&
344	!(*xit).compare(0, wrapd.size(), wrapd)) {	350	!(*xit).compare(0, wrapd.size(), wrapd)) {
345	LOGDEB1("Db::clearfield: erasing for [" << ((*xit)) << "]\n" );	351	LOGDEB1("Db::clearfield: erasing for [" << *xit << "]\n");
346	Xapian::PositionIterator posit;	352	Xapian::PositionIterator posit;
347	for (posit = xit.positionlist_begin();	353	for (posit = xit.positionlist_begin();
348	posit != xit.positionlist_end(); posit++) {	354	posit != xit.positionlist_end(); posit++) {
349	eraselist.push_back(DocPosting(xit, posit));	355	eraselist.push_back(DocPosting(xit, posit));
350	eraselist.push_back(DocPosting(strip_prefix(xit), posit));	356	eraselist.push_back(DocPosting(strip_prefix(xit), posit));
	...		...
357	continue;	363	continue;
358	} XCATCHERROR(m_rcldb->m_reason);	364	} XCATCHERROR(m_rcldb->m_reason);
359	break;	365	break;
360	}	366	}
361	if (!m_rcldb->m_reason.empty()) {	367	if (!m_rcldb->m_reason.empty()) {
362	LOGERR("Db::clearField: failed building erase list: " << (m_rcldb->m_reason) << "\n" );	368	LOGERR("Db::clearField: failed building erase list: " <<
		369	m_rcldb->m_reason << "\n");
363	return false;	370	return false;
364	}	371	}
365		372
366	// Now remove the found positions, and the terms if the wdf is 0	373	// Now remove the found positions, and the terms if the wdf is 0
367	for (vector<DocPosting>::const_iterator it = eraselist.begin();	374	for (vector<DocPosting>::const_iterator it = eraselist.begin();
368	it != eraselist.end(); it++) {	375	it != eraselist.end(); it++) {
369	LOGDEB1("Db::clearField: remove posting: [" << (it->term) << "] pos [" << (int(it->pos)) << "]\n" );	376	LOGDEB1("Db::clearField: remove posting: [" << it->term << "] pos [" <<
		377	it->pos << "]\n");
370	XAPTRY(xdoc.remove_posting(it->term, it->pos, wdfdec);,	378	XAPTRY(xdoc.remove_posting(it->term, it->pos, wdfdec);,
371	xwdb,m_rcldb->m_reason);	379	xwdb,m_rcldb->m_reason);
372	if (!m_rcldb->m_reason.empty()) {	380	if (!m_rcldb->m_reason.empty()) {
373	// Not that this normally fails for non-prefixed XXST and	381	// Not that this normally fails for non-prefixed XXST and
374	// ND, don't make a fuss	382	// ND, don't make a fuss
375	LOGDEB1("Db::clearFiedl: remove_posting failed for [" << (it->term) << "]," << (int(it->pos)) << ": " << (m_rcldb->m_reason) << "\n" );	383	LOGDEB1("Db::clearFiedl: remove_posting failed for [" << it->term <<
		384	"]," << it->pos << ": " << m_rcldb->m_reason << "\n");
376	}	385	}
377	clearDocTermIfWdf0(xdoc, it->term);	386	clearDocTermIfWdf0(xdoc, it->term);
378	}	387	}
379	return true;	388	return true;
380	}	389	}
381		390
382	// Check if doc given by udi is indexed by term	391	// Check if doc given by udi is indexed by term
383	bool Db::Native::hasTerm(const string& udi, int idxi, const string& term)	392	bool Db::Native::hasTerm(const string& udi, int idxi, const string& term)
384	{	393	{
385	LOGDEB2("Native::hasTerm: udi [" << (udi) << "] term [" << (term) << "]\n" );	394	LOGDEB2("Native::hasTerm: udi [" << udi << "] term [" << term << "]\n");
386	Xapian::Document xdoc;	395	Xapian::Document xdoc;
387	if (getDoc(udi, idxi, xdoc)) {	396	if (getDoc(udi, idxi, xdoc)) {
388	Xapian::TermIterator xit;	397	Xapian::TermIterator xit;
389	XAPTRY(xit = xdoc.termlist_begin();	398	XAPTRY(xit = xdoc.termlist_begin();
390	xit.skip_to(term);,	399	xit.skip_to(term);,
391	xrdb, m_rcldb->m_reason);	400	xrdb, m_rcldb->m_reason);
392	if (!m_rcldb->m_reason.empty()) {	401	if (!m_rcldb->m_reason.empty()) {
393	LOGERR("Rcl::Native::hasTerm: " << (m_rcldb->m_reason) << "\n" );	402	LOGERR("Rcl::Native::hasTerm: " << m_rcldb->m_reason << "\n");
394	return false;	403	return false;
395	}	404	}
396	if (xit != xdoc.termlist_end() && !term.compare(*xit)) {	405	if (xit != xdoc.termlist_end() && !term.compare(*xit)) {
397	return true;	406	return true;
398	}	407	}
	...		...
422	xrdb.reopen();	431	xrdb.reopen();
423	continue;	432	continue;
424	} XCATCHERROR(m_rcldb->m_reason);	433	} XCATCHERROR(m_rcldb->m_reason);
425	break;	434	break;
426	}	435	}
427	LOGERR("Db::Native::getDoc: Xapian error: " << (m_rcldb->m_reason) << "\n" );	436	LOGERR("Db::Native::getDoc: Xapian error: " << m_rcldb->m_reason << "\n");
428	return 0;	437	return 0;
429	}	438	}
430		439
431	// Turn data record from db into document fields	440	// Turn data record from db into document fields
432	bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,	441	bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
433	Doc &doc)	442	Doc &doc)
434	{	443	{
435	LOGDEB2("Db::dbDataToRclDoc: data:\n" << (data) << "\n" );	444	LOGDEB2("Db::dbDataToRclDoc: data:\n" << data << "\n");
436	ConfSimple parms(data);	445	ConfSimple parms(data);
437	if (!parms.ok())	446	if (!parms.ok())
438	return false;	447	return false;
439		448
440	doc.xdocid = docid;	449	doc.xdocid = docid;
	...		...
501	if (pos != xrdb.positionlist_end(docid, page_break_term)) {	510	if (pos != xrdb.positionlist_end(docid, page_break_term)) {
502	return true;	511	return true;
503	},	512	},
504	xrdb, ermsg);	513	xrdb, ermsg);
505	if (!ermsg.empty()) {	514	if (!ermsg.empty()) {
506	LOGERR("Db::Native::hasPages: xapian error: " << (ermsg) << "\n" );	515	LOGERR("Db::Native::hasPages: xapian error: " << ermsg << "\n");
507	}	516	}
508	return false;	517	return false;
509	}	518	}
510		519
511	// Return the positions list for the page break term	520	// Return the positions list for the page break term
	...		...
538	try {	547	try {
539	for (pos = xrdb.positionlist_begin(docid, qterm);	548	for (pos = xrdb.positionlist_begin(docid, qterm);
540	pos != xrdb.positionlist_end(docid, qterm); pos++) {	549	pos != xrdb.positionlist_end(docid, qterm); pos++) {
541	int ipos = *pos;	550	int ipos = *pos;
542	if (ipos < int(baseTextPosition)) {	551	if (ipos < int(baseTextPosition)) {
543	LOGDEB("getPagePositions: got page position " << (ipos) << " not in body\n" );	552	LOGDEB("getPagePositions: got page position " << ipos
		553	<< " not in body\n");
544	// Not in text body. Strange...	554	// Not in text body. Strange...
545	continue;	555	continue;
546	}	556	}
547	map<int, int>::iterator it = mbreaksmap.find(ipos);	557	map<int, int>::iterator it = mbreaksmap.find(ipos);
548	if (it != mbreaksmap.end()) {	558	if (it != mbreaksmap.end()) {
549	LOGDEB1("getPagePositions: found multibreak at " << (ipos) << " incr " << (it->second) << "\n" );	559	LOGDEB1("getPagePositions: found multibreak at " << ipos <<
		560	" incr " << it->second << "\n");
550	for (int i = 0 ; i < it->second; i++)	561	for (int i = 0 ; i < it->second; i++)
551	vpos.push_back(ipos);	562	vpos.push_back(ipos);
552	}	563	}
553	vpos.push_back(ipos);	564	vpos.push_back(ipos);
554	}	565	}
	...		...
584	// to do this after having prepared the document, but it needs to be in	595	// to do this after having prepared the document, but it needs to be in
585	// the single-threaded section.	596	// the single-threaded section.
586	if (m_rcldb->m_maxFsOccupPc > 0 &&	597	if (m_rcldb->m_maxFsOccupPc > 0 &&
587	(m_rcldb->m_occFirstCheck \|\|	598	(m_rcldb->m_occFirstCheck \|\|
588	(m_rcldb->m_curtxtsz - m_rcldb->m_occtxtsz) / MB >= 1)) {	599	(m_rcldb->m_curtxtsz - m_rcldb->m_occtxtsz) / MB >= 1)) {
589	LOGDEB("Db::add: checking file system usage\n" );	600	LOGDEB("Db::add: checking file system usage\n");
590	int pc;	601	int pc;
591	m_rcldb->m_occFirstCheck = 0;	602	m_rcldb->m_occFirstCheck = 0;
592	if (fsocc(m_rcldb->m_basedir, &pc) && pc >= m_rcldb->m_maxFsOccupPc) {	603	if (fsocc(m_rcldb->m_basedir, &pc) && pc >= m_rcldb->m_maxFsOccupPc) {
593	LOGERR("Db::add: stop indexing: file system " << pc << " %" <<	604	LOGERR("Db::add: stop indexing: file system " << pc << " %" <<
594	" full > max " << m_rcldb->m_maxFsOccupPc << " %" << "\n");	605	" full > max " << m_rcldb->m_maxFsOccupPc << " %" << "\n");
595	return false;	606	return false;
596	}	607	}
597	m_rcldb->m_occtxtsz = m_rcldb->m_curtxtsz;	608	m_rcldb->m_occtxtsz = m_rcldb->m_curtxtsz;
598	}	609	}
599		610
	...		...
607	if (did < m_rcldb->updated.size()) {	618	if (did < m_rcldb->updated.size()) {
608	// This is necessary because only the file-level docs are tested	619	// This is necessary because only the file-level docs are tested
609	// by needUpdate(), so the subdocs existence flags are only set	620	// by needUpdate(), so the subdocs existence flags are only set
610	// here.	621	// here.
611	m_rcldb->updated[did] = true;	622	m_rcldb->updated[did] = true;
612	LOGINFO("Db::add: docid " << (did) << " updated [" << (fnc) << "]\n" );	623	LOGINFO("Db::add: docid " << did << " updated [" << fnc << "]\n");
613	} else {	624	} else {
614	LOGINFO("Db::add: docid " << (did) << " added [" << (fnc) << "]\n" );	625	LOGINFO("Db::add: docid " << did << " added [" << fnc << "]\n");
615	}	626	}
616	} XCATCHERROR(ermsg);	627	} XCATCHERROR(ermsg);
617		628
618	if (!ermsg.empty()) {	629	if (!ermsg.empty()) {
619	LOGERR("Db::add: replace_document failed: " << (ermsg) << "\n" );	630	LOGERR("Db::add: replace_document failed: " << ermsg << "\n");
620	ermsg.erase();	631	ermsg.erase();
621	// FIXME: is this ever actually needed?	632	// FIXME: is this ever actually needed?
622	try {	633	try {
623	xwdb.add_document(*newdocument_ptr);	634	xwdb.add_document(*newdocument_ptr);
624	LOGDEB("Db::add: " << (fnc) << " added (failed re-seek for duplicate)\n" );	635	LOGDEB("Db::add: " << fnc <<
		636	" added (failed re-seek for duplicate)\n");
625	} XCATCHERROR(ermsg);	637	} XCATCHERROR(ermsg);
626	if (!ermsg.empty()) {	638	if (!ermsg.empty()) {
627	LOGERR("Db::add: add_document failed: " << (ermsg) << "\n" );	639	LOGERR("Db::add: add_document failed: " << ermsg << "\n");
628	return false;	640	return false;
629	}	641	}
630	}	642	}
631		643
632	// Test if we're over the flush threshold (limit memory usage):	644	// Test if we're over the flush threshold (limit memory usage):
	...		...
661	string sig;	673	string sig;
662	if (orphansOnly) {	674	if (orphansOnly) {
663	Xapian::Document doc = xwdb.get_document(*docid);	675	Xapian::Document doc = xwdb.get_document(*docid);
664	sig = doc.get_value(VALUE_SIG);	676	sig = doc.get_value(VALUE_SIG);
665	if (sig.empty()) {	677	if (sig.empty()) {
666	LOGINFO("purgeFileWrite: got empty sig\n" );	678	LOGINFO("purgeFileWrite: got empty sig\n");
667	return false;	679	return false;
668	}	680	}
669	} else {	681	} else {
670	LOGDEB("purgeFile: delete docid " << (*docid) << "\n" );	682	LOGDEB("purgeFile: delete docid " << *docid << "\n");
671	xwdb.delete_document(*docid);	683	xwdb.delete_document(*docid);
672	}	684	}
673	vector<Xapian::docid> docids;	685	vector<Xapian::docid> docids;
674	subDocs(udi, 0, docids);	686	subDocs(udi, 0, docids);
675	LOGDEB("purgeFile: subdocs cnt " << (docids.size()) << "\n" );	687	LOGDEB("purgeFile: subdocs cnt " << docids.size() << "\n");
676	for (vector<Xapian::docid>::iterator it = docids.begin();	688	for (vector<Xapian::docid>::iterator it = docids.begin();
677	it != docids.end(); it++) {	689	it != docids.end(); it++) {
678	if (m_rcldb->m_flushMb > 0) {	690	if (m_rcldb->m_flushMb > 0) {
679	Xapian::termcount trms = xwdb.get_doclength(*it);	691	Xapian::termcount trms = xwdb.get_doclength(*it);
680	m_rcldb->maybeflush(trms * 5);	692	m_rcldb->maybeflush(trms * 5);
	...		...
682	string subdocsig;	694	string subdocsig;
683	if (orphansOnly) {	695	if (orphansOnly) {
684	Xapian::Document doc = xwdb.get_document(*it);	696	Xapian::Document doc = xwdb.get_document(*it);
685	subdocsig = doc.get_value(VALUE_SIG);	697	subdocsig = doc.get_value(VALUE_SIG);
686	if (subdocsig.empty()) {	698	if (subdocsig.empty()) {
687	LOGINFO("purgeFileWrite: got empty sig for subdoc??\n" );	699	LOGINFO("purgeFileWrite: got empty sig for subdoc??\n");
688	continue;	700	continue;
689	}	701	}
690	}	702	}
691		703
692	if (!orphansOnly \|\| sig != subdocsig) {	704	if (!orphansOnly \|\| sig != subdocsig) {
693	LOGDEB("Db::purgeFile: delete subdoc " << (*it) << "\n" );	705	LOGDEB("Db::purgeFile: delete subdoc " << *it << "\n");
694	xwdb.delete_document(*it);	706	xwdb.delete_document(*it);
695	}	707	}
696	}	708	}
697	return true;	709	return true;
698	} XCATCHERROR(ermsg);	710	} XCATCHERROR(ermsg);
699	if (!ermsg.empty()) {	711	if (!ermsg.empty()) {
700	LOGERR("Db::purgeFileWrite: " << (ermsg) << "\n" );	712	LOGERR("Db::purgeFileWrite: " << ermsg << "\n");
701	}	713	}
702	return false;	714	return false;
703	}	715	}
704		716
705		717
	...		...
758		770
759	if (m_ndb == 0 \|\| m_config == 0) {	771	if (m_ndb == 0 \|\| m_config == 0) {
760	m_reason = "Null configuration or Xapian Db";	772	m_reason = "Null configuration or Xapian Db";
761	return false;	773	return false;
762	}	774	}
763	LOGDEB("Db::open: m_isopen " << (m_ndb->m_isopen) << " m_iswritable " << (m_ndb->m_iswritable) << " mode " << (mode) << "\n" );	775	LOGDEB("Db::open: m_isopen " << m_ndb->m_isopen << " m_iswritable " <<
		776	m_ndb->m_iswritable << " mode " << mode << "\n");
764		777
765	if (m_ndb->m_isopen) {	778	if (m_ndb->m_isopen) {
766	// We used to return an error here but I see no reason to	779	// We used to return an error here but I see no reason to
767	if (!close())	780	if (!close())
768	return false;	781	return false;
	...		...
796	// subDocs(). This issue has been gone for a long time	809	// subDocs(). This issue has been gone for a long time
797	// (now: Xapian 1.2) and the separate objects seem to	810	// (now: Xapian 1.2) and the separate objects seem to
798	// trigger other Xapian issues, so the query db is now	811	// trigger other Xapian issues, so the query db is now
799	// a clone of the update one.	812	// a clone of the update one.
800	m_ndb->xrdb = m_ndb->xwdb;	813	m_ndb->xrdb = m_ndb->xwdb;
801	LOGDEB("Db::open: lastdocid: " << (m_ndb->xwdb.get_lastdocid()) << "\n" );	814	LOGDEB("Db::open: lastdocid: " << m_ndb->xwdb.get_lastdocid() <<
		815	"\n");
802	LOGDEB2("Db::open: resetting updated\n" );	816	LOGDEB2("Db::open: resetting updated\n");
803	updated.resize(m_ndb->xwdb.get_lastdocid() + 1);	817	updated.resize(m_ndb->xwdb.get_lastdocid() + 1);
804	for (unsigned int i = 0; i < updated.size(); i++)	818	for (unsigned int i = 0; i < updated.size(); i++)
805	updated[i] = false;	819	updated[i] = false;
806	}	820	}
807	break;	821	break;
	...		...
811	m_ndb->xrdb = Xapian::Database(dir);	825	m_ndb->xrdb = Xapian::Database(dir);
812	for (vector<string>::iterator it = m_extraDbs.begin();	826	for (vector<string>::iterator it = m_extraDbs.begin();
813	it != m_extraDbs.end(); it++) {	827	it != m_extraDbs.end(); it++) {
814	if (error)	828	if (error)
815	*error = DbOpenExtraDb;	829	*error = DbOpenExtraDb;
816	LOGDEB("Db::Open: adding query db [" << &(*it) << "]\n" );	830	LOGDEB("Db::Open: adding query db [" << &(*it) << "]\n");
817	// An error here used to be non-fatal (1.13 and older)	831	// An error here used to be non-fatal (1.13 and older)
818	// but I can't see why	832	// but I can't see why
819	m_ndb->xrdb.add_database(Xapian::Database(*it));	833	m_ndb->xrdb.add_database(Xapian::Database(*it));
820	}	834	}
821	break;	835	break;
	...		...
827	// truncated db	841	// truncated db
828	if (mode != DbTrunc && m_ndb->xrdb.get_doccount() > 0) {	842	if (mode != DbTrunc && m_ndb->xrdb.get_doccount() > 0) {
829	string version = m_ndb->xrdb.get_metadata(cstr_RCL_IDX_VERSION_KEY);	843	string version = m_ndb->xrdb.get_metadata(cstr_RCL_IDX_VERSION_KEY);
830	if (version.compare(cstr_RCL_IDX_VERSION)) {	844	if (version.compare(cstr_RCL_IDX_VERSION)) {
831	m_ndb->m_noversionwrite = true;	845	m_ndb->m_noversionwrite = true;
832	LOGERR("Rcl::Db::open: file index [" << (version) << "], software [" << (cstr_RCL_IDX_VERSION) << "]\n" );	846	LOGERR("Rcl::Db::open: file index [" << version <<
		847	"], software [" << cstr_RCL_IDX_VERSION << "]\n");
833	throw Xapian::DatabaseError("Recoll index version mismatch",	848	throw Xapian::DatabaseError("Recoll index version mismatch",
834	"", "");	849	"", "");
835	}	850	}
836	}	851	}
837	m_mode = mode;	852	m_mode = mode;
	...		...
841	*error = DbOpenNoError;	856	*error = DbOpenNoError;
842	return true;	857	return true;
843	} XCATCHERROR(ermsg);	858	} XCATCHERROR(ermsg);
844		859
845	m_reason = ermsg;	860	m_reason = ermsg;
846	LOGERR("Db::open: exception while opening [" << (dir) << "]: " << (ermsg) << "\n" );	861	LOGERR("Db::open: exception while opening [" <<dir<< "]: " << ermsg << "\n");
847	return false;	862	return false;
848	}	863	}
849		864
850	// Note: xapian has no close call, we delete and recreate the db	865	// Note: xapian has no close call, we delete and recreate the db
851	bool Db::close()	866	bool Db::close()
852	{	867	{
853	LOGDEB1("Db::close()\n" );	868	LOGDEB1("Db::close()\n");
854	return i_close(false);	869	return i_close(false);
855	}	870	}
856	bool Db::i_close(bool final)	871	bool Db::i_close(bool final)
857	{	872	{
858	if (m_ndb == 0)	873	if (m_ndb == 0)
859	return false;	874	return false;
860	LOGDEB("Db::i_close(" << (final) << "): m_isopen " << (m_ndb->m_isopen) << " m_iswritable " << (m_ndb->m_iswritable) << "\n" );	875	LOGDEB("Db::i_close(" << final << "): m_isopen " << m_ndb->m_isopen <<
		876	" m_iswritable " << m_ndb->m_iswritable << "\n");
861	if (m_ndb->m_isopen == false && !final)	877	if (m_ndb->m_isopen == false && !final)
862	return true;	878	return true;
863		879
864	string ermsg;	880	string ermsg;
865	try {	881	try {
	...		...
869	waitUpdIdle();	885	waitUpdIdle();
870	#endif	886	#endif
871	if (!m_ndb->m_noversionwrite)	887	if (!m_ndb->m_noversionwrite)
872	m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,	888	m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
873	cstr_RCL_IDX_VERSION);	889	cstr_RCL_IDX_VERSION);
874	LOGDEB("Rcl::Db:close: xapian will close. May take some time\n" );	890	LOGDEB("Rcl::Db:close: xapian will close. May take some time\n");
875	}	891	}
876	deleteZ(m_ndb);	892	deleteZ(m_ndb);
877	if (w)	893	if (w)
878	LOGDEB("Rcl::Db:close() xapian close done.\n" );	894	LOGDEB("Rcl::Db:close() xapian close done.\n");
879	if (final) {	895	if (final) {
880	return true;	896	return true;
881	}	897	}
882	m_ndb = new Native(this);	898	m_ndb = new Native(this);
883	if (m_ndb) {	899	if (m_ndb) {
884	return true;	900	return true;
885	}	901	}
886	LOGERR("Rcl::Db::close(): cant recreate db object\n" );	902	LOGERR("Rcl::Db::close(): cant recreate db object\n");
887	return false;	903	return false;
888	} XCATCHERROR(ermsg);	904	} XCATCHERROR(ermsg);
889	LOGERR("Db:close: exception while deleting db: " << (ermsg) << "\n" );	905	LOGERR("Db:close: exception while deleting db: " << ermsg << "\n");
890	return false;	906	return false;
891	}	907	}
892		908
893	// Reopen the db with a changed list of additional dbs	909	// Reopen the db with a changed list of additional dbs
894	bool Db::adjustdbs()	910	bool Db::adjustdbs()
895	{	911	{
896	if (m_mode != DbRO) {	912	if (m_mode != DbRO) {
897	LOGERR("Db::adjustdbs: mode not RO\n" );	913	LOGERR("Db::adjustdbs: mode not RO\n");
898	return false;	914	return false;
899	}	915	}
900	if (m_ndb && m_ndb->m_isopen) {	916	if (m_ndb && m_ndb->m_isopen) {
901	if (!close())	917	if (!close())
902	return false;	918	return false;
	...		...
914	return -1;	930	return -1;
915		931
916	XAPTRY(res = m_ndb->xrdb.get_doccount(), m_ndb->xrdb, m_reason);	932	XAPTRY(res = m_ndb->xrdb.get_doccount(), m_ndb->xrdb, m_reason);
917		933
918	if (!m_reason.empty()) {	934	if (!m_reason.empty()) {
919	LOGERR("Db::docCnt: got error: " << (m_reason) << "\n" );	935	LOGERR("Db::docCnt: got error: " << m_reason << "\n");
920	return -1;	936	return -1;
921	}	937	}
922	return res;	938	return res;
923	}	939	}
924		940
	...		...
929	return -1;	945	return -1;
930		946
931	string term = _term;	947	string term = _term;
932	if (o_index_stripchars)	948	if (o_index_stripchars)
933	if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {	949	if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
934	LOGINFO("Db::termDocCnt: unac failed for [" << (_term) << "]\n" );	950	LOGINFO("Db::termDocCnt: unac failed for [" << _term << "]\n");
935	return 0;	951	return 0;
936	}	952	}
937		953
938	if (m_stops.isStop(term)) {	954	if (m_stops.isStop(term)) {