Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
...
...
91
91
92
// Synthetic abstract marker (to discriminate from abstract actually
92
// Synthetic abstract marker (to discriminate from abstract actually
93
// found in document)
93
// found in document)
94
static const string cstr_syntAbs("?!#@");
94
static const string cstr_syntAbs("?!#@");
95
95
96
// Only ONE field name inside the index data record differs from the
97
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
98
// omega
99
100
// Static/Default table for field->prefix/weight translation. 
101
// This is logically const after initialization. Can't use a
102
// static object to init this as the static std::string objects may
103
// not be ready.
104
//
105
// This map is searched if a match is not found in the dynamic
106
// "fields" configuration (cf: Db::fieldToTraits()), meaning that the
107
// entries can be overriden in the configuration, but not
108
// suppressed. 
109
110
static map<string, FieldTraits> fldToTraits;
111
static PTMutexInit o_fldToTraits_mutex;
112
96
113
// A bogus fldToTraits key (bogus because not a real field) used to
97
// A bogus fldToTraits key (bogus because not a real field) used to
114
// retrieve the prefix used for specific filename searches (unsplit
98
// retrieve the prefix used for specific filename searches (unsplit
115
// filename, not "filename as 'filename:' field" searches)
99
// filename, not "filename as 'filename:' field" searches)
116
static const string keySysFilenamePrefix("rclUnsplitFN");
100
static const string keySysFilenamePrefix("rclUnsplitFN");
117
// The prefix for regular "filename:" field searches.
118
static const string cstr_fnAsFieldPrefix("XSFN");
119
// The prefix for unsplit filename terms used with specific -f or
120
// "File Name" GUI entries. There is a compile option to use the same prefix 
121
// for both.
122
// #define UNSPLIT_FN_PREFIX_SAME_AS_SPLIT
123
#if defined(UNSPLIT_FN_PREFIX_SAME_AS_SPLIT)
124
static const string cstr_fnUnsplitPrefix(cstr_fnAsFieldPrefix);
125
#else
126
static const string cstr_fnUnsplitPrefix("XSFS");
101
static const string cstr_fnUnsplitPrefix("XSFS");
127
#endif
128
129
static void initFldToTraits() 
130
{
131
    PTMutexLocker locker(o_fldToTraits_mutex);
132
    // As we perform non-locked testing of initialization, check again with
133
    // the lock held
134
    if (fldToTraits.size())
135
  return;
136
137
    // Can't remember why "abstract" is indexed without a prefix
138
    // (result: it's indexed twice actually). Maybe I'll dare change
139
    // this one day
140
    fldToTraits[Doc::keyabs] = FieldTraits();
141
142
    fldToTraits["ext"] = FieldTraits("XE");
143
144
    fldToTraits[Doc::keyfn] = FieldTraits(cstr_fnAsFieldPrefix);
145
    fldToTraits[keySysFilenamePrefix] = FieldTraits(cstr_fnUnsplitPrefix);
146
147
    fldToTraits[cstr_caption] = FieldTraits("S");
148
    fldToTraits[Doc::keytt] = FieldTraits("S");
149
    fldToTraits["subject"] = FieldTraits("S");
150
151
    fldToTraits[Doc::keyau] = FieldTraits("A");
152
    fldToTraits["creator"] = FieldTraits("A");
153
    fldToTraits["from"] = FieldTraits("A");
154
155
    fldToTraits[Doc::keykw] = FieldTraits("K");
156
    fldToTraits["keyword"] = FieldTraits("K");
157
    fldToTraits["tag"] = FieldTraits("K");
158
    fldToTraits["tags"] = FieldTraits("K");
159
160
    fldToTraits["xapyear"] = FieldTraits("Y");
161
    fldToTraits["xapyearmon"] = FieldTraits("M");
162
    fldToTraits["xapdate"] = FieldTraits("D");
163
    fldToTraits[Doc::keytp] = FieldTraits("T");
164
}
165
102
166
// Compute the unique term used to link documents to their origin. 
103
// Compute the unique term used to link documents to their origin. 
167
// "Q" + external udi
104
// "Q" + external udi
168
static inline string make_uniterm(const string& udi)
105
static inline string make_uniterm(const string& udi)
169
{
106
{
170
    string uniterm("Q");
107
    string uniterm("Q");
171
    uniterm.append(udi);
108
    uniterm.append(udi);
172
    return uniterm;
109
    return uniterm;
173
}
110
}
111
174
// Compute parent term used to link documents to their parent document (if any)
112
// Compute parent term used to link documents to their parent document (if any)
175
// "F" + parent external udi
113
// "F" + parent external udi
176
static inline string make_parentterm(const string& udi)
114
static inline string make_parentterm(const string& udi)
177
{
115
{
178
    // I prefer to be in possible conflict with omega than with
116
    // I prefer to be in possible conflict with omega than with
...
...
683
    : m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
621
    : m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
684
      m_synthAbsWordCtxLen(4), m_flushMb(-1), 
622
      m_synthAbsWordCtxLen(4), m_flushMb(-1), 
685
      m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
623
      m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
686
      m_maxFsOccupPc(0), m_mode(Db::DbRO)
624
      m_maxFsOccupPc(0), m_mode(Db::DbRO)
687
{
625
{
688
    if (!fldToTraits.size())
689
  initFldToTraits();
690
691
    m_ndb = new Native(this);
626
    m_ndb = new Native(this);
692
    if (m_config) {
627
    if (m_config) {
693
    m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
628
    m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
694
    m_config->getConfParam("idxflushmb", &m_flushMb);
629
    m_config->getConfParam("idxflushmb", &m_flushMb);
695
    }
630
    }
...
...
974
    if (m_ndb == 0)
909
    if (m_ndb == 0)
975
    return false;
910
    return false;
976
    return m_ndb->m_isopen;
911
    return m_ndb->m_isopen;
977
}
912
}
978
913
979
// Try to translate field specification into field prefix.  We have a
914
// Try to translate field specification into field prefix. 
980
// default table used if translations are not in the config for some
981
// reason (old config not updated ?). We use it only if the config
982
// translation fails. Also we add in there fields which should be
983
// indexed with no prefix (ie: abstract)
984
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp)
915
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp)
985
{
916
{
986
    if (m_config && m_config->getFieldTraits(fld, ftpp))
917
    if (m_config && m_config->getFieldTraits(fld, ftpp))
987
    return true;
918
    return true;
988
919
989
    // No data in rclconfig? Check default values
990
    map<string, FieldTraits>::const_iterator it = fldToTraits.find(fld);
991
    if (it != fldToTraits.end()) {
992
  *ftpp = &it->second;
993
  return true;
994
    }
995
    *ftpp = 0;
920
    *ftpp = 0;
996
    return false;
921
    return false;
997
}
922
}
998
923
999
924
...
...
1308
1233
1309
    ////// Special terms for other metadata. No positions for these.
1234
    ////// Special terms for other metadata. No positions for these.
1310
    // Mime type
1235
    // Mime type
1311
    newdocument.add_term("T" + doc.mimetype);
1236
    newdocument.add_term("T" + doc.mimetype);
1312
1237
1313
    // Simple file name indexed unsplit for file name searches with a
1238
    // Simple file name indexed unsplit for specific "file name"
1314
    // term prefix We also add a term for the filename extension if
1239
    // searches. This is not the same as a filename: clause inside the
1315
    // any.
1240
    // query language.
1241
    // We also add a term for the filename extension if any.
1316
    string utf8fn;
1242
    string utf8fn;
1317
    if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
1243
    if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
1318
    string fn;
1244
    string fn;
1319
    if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) {
1245
    if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) {
1320
        // We should truncate after extracting the extension, but this is
1246
        // We should truncate after extracting the extension, but this is