Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
...
...
59
#include "internfile.h"
59
#include "internfile.h"
60
#include "utf8fn.h"
60
#include "utf8fn.h"
61
#ifdef RCL_USE_ASPELL
61
#ifdef RCL_USE_ASPELL
62
#include "rclaspell.h"
62
#include "rclaspell.h"
63
#endif
63
#endif
64
#include "zlibut.h"
64
65
65
// Recoll index format version is stored in user metadata. When this change,
66
// Recoll index format version is stored in user metadata. When this change,
66
// we can't open the db and will have to reindex.
67
// we can't open the db and will have to reindex.
67
static const string cstr_RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
68
static const string cstr_RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
68
static const string cstr_RCL_IDX_VERSION("1");
69
static const string cstr_RCL_IDX_VERSION("1");
...
...
1456
    LOGDEB2("Db::add: split body: [" << doc.text << "]\n");
1457
    LOGDEB2("Db::add: split body: [" << doc.text << "]\n");
1457
1458
1458
#ifdef TEXTSPLIT_STATS
1459
#ifdef TEXTSPLIT_STATS
1459
    splitter.resetStats();
1460
    splitter.resetStats();
1460
#endif
1461
#endif
1461
    if (!splitter.text_to_words(doc.text))
1462
    if (!splitter.text_to_words(doc.text)) {
1462
        LOGDEB("Db::addOrUpdate: split failed for main text\n");
1463
        LOGDEB("Db::addOrUpdate: split failed for main text\n");
1464
        } else {
1465
#ifdef RAWTEXT_IN_VALUE
1466
            if (o_index_storerawtext) {
1467
                ZLibUtBuf buf;
1468
                deflateToBuf(doc.text.c_str(), doc.text.size(), buf);
1469
                string tt;
1470
                tt.assign(buf.getBuf(), buf.getCnt());
1471
                newdocument.add_value(VALUE_RAWTEXT, tt);
1472
            }
1473
#endif
1474
        }
1463
1475
1464
#ifdef TEXTSPLIT_STATS
1476
#ifdef TEXTSPLIT_STATS
1465
    // Reject bad data. unrecognized base64 text is characterized by
1477
    // Reject bad data. unrecognized base64 text is characterized by
1466
    // high avg word length and high variation (because there are
1478
    // high avg word length and high variation (because there are
1467
    // word-splitters like +/ inside the data).
1479
    // word-splitters like +/ inside the data).
...
...
1668
        MD5HexScan(*md5, digest);
1680
        MD5HexScan(*md5, digest);
1669
        newdocument.add_value(VALUE_MD5, digest);
1681
        newdocument.add_value(VALUE_MD5, digest);
1670
        newdocument.add_boolean_term(wrap_prefix("XM") + *md5);
1682
        newdocument.add_boolean_term(wrap_prefix("XM") + *md5);
1671
    }
1683
    }
1672
1684
1685
#ifdef RAWTEXT_IN_DATA
1686
        if (o_index_storerawtext) {
1687
            RECORD_APPEND(record, string("RAWTEXT"),
1688
                          neutchars(doc.text, cstr_nc));
1689
        }
1690
#endif
1673
    LOGDEB0("Rcl::Db::add: new doc record:\n" << record << "\n");
1691
    LOGDEB0("Rcl::Db::add: new doc record:\n" << record << "\n");
1674
    newdocument.set_data(record);
1692
    newdocument.set_data(record);
1675
    }
1693
    }
1676
#ifdef IDX_THREADS
1694
#ifdef IDX_THREADS
1677
    if (m_ndb->m_havewriteq) {
1695
    if (m_ndb->m_havewriteq) {