|
a/src/common/rclconfig.cpp |
|
b/src/common/rclconfig.cpp |
|
... |
|
... |
53 |
#endif
|
53 |
#endif
|
54 |
#ifndef MAX
|
54 |
#ifndef MAX
|
55 |
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
|
55 |
#define MAX(A,B) (((A)>(B)) ? (A) : (B))
|
56 |
#endif
|
56 |
#endif
|
57 |
|
57 |
|
|
|
58 |
bool ParamStale::needrecompute()
|
|
|
59 |
{
|
|
|
60 |
if (parent->m_keydirgen != savedkeydirgen) {
|
|
|
61 |
savedkeydirgen = parent->m_keydirgen;
|
|
|
62 |
string newvalue;
|
|
|
63 |
if (!conffile)
|
|
|
64 |
return false;
|
|
|
65 |
conffile->get(paramname, newvalue, parent->m_keydir);
|
|
|
66 |
if (newvalue.compare(savedvalue)) {
|
|
|
67 |
savedvalue = newvalue;
|
|
|
68 |
return true;
|
|
|
69 |
}
|
|
|
70 |
}
|
|
|
71 |
return false;
|
|
|
72 |
}
|
|
|
73 |
void ParamStale::init(RclConfig *rconf, ConfNull *cnf, const string& nm)
|
|
|
74 |
{
|
|
|
75 |
parent = rconf;
|
|
|
76 |
conffile = cnf;
|
|
|
77 |
paramname = nm;
|
|
|
78 |
savedkeydirgen = -1;
|
|
|
79 |
}
|
|
|
80 |
|
|
|
81 |
void RclConfig::zeroMe() {
|
|
|
82 |
m_ok = false;
|
|
|
83 |
m_keydirgen = 0;
|
|
|
84 |
m_conf = 0;
|
|
|
85 |
mimemap = 0;
|
|
|
86 |
mimeconf = 0;
|
|
|
87 |
mimeview = 0;
|
|
|
88 |
m_fields = 0;
|
|
|
89 |
m_stopsuffixes = 0;
|
|
|
90 |
m_maxsufflen = 0;
|
|
|
91 |
m_stpsuffstate.init(this, 0, "recoll_noindex");
|
|
|
92 |
m_skpnstate.init(this, 0, "skippedNames");
|
|
|
93 |
m_rmtstate.init(this, 0, "indexedmimetypes");
|
|
|
94 |
}
|
|
|
95 |
|
58 |
RclConfig::RclConfig(const string *argcnf)
|
96 |
RclConfig::RclConfig(const string *argcnf)
|
59 |
{
|
97 |
{
|
60 |
zeroMe();
|
98 |
zeroMe();
|
61 |
// Compute our data dir name, typically /usr/local/share/recoll
|
99 |
// Compute our data dir name, typically /usr/local/share/recoll
|
62 |
const char *cdatadir = getenv("RECOLL_DATADIR");
|
100 |
const char *cdatadir = getenv("RECOLL_DATADIR");
|
|
... |
|
... |
132 |
if (!readFieldsConfig(cnferrloc))
|
170 |
if (!readFieldsConfig(cnferrloc))
|
133 |
return;
|
171 |
return;
|
134 |
|
172 |
|
135 |
m_ok = true;
|
173 |
m_ok = true;
|
136 |
setKeyDir("");
|
174 |
setKeyDir("");
|
|
|
175 |
|
|
|
176 |
m_stpsuffstate.init(this, mimemap, "recoll_noindex");
|
|
|
177 |
m_skpnstate.init(this, m_conf, "skippedNames");
|
|
|
178 |
m_rmtstate.init(this, m_conf, "indexedmimetypes");
|
137 |
return;
|
179 |
return;
|
138 |
}
|
180 |
}
|
139 |
|
181 |
|
140 |
bool RclConfig::updateMainConfig()
|
182 |
bool RclConfig::updateMainConfig()
|
141 |
{
|
183 |
{
|
|
... |
|
... |
143 |
if (m_conf == 0 || !m_conf->ok()) {
|
185 |
if (m_conf == 0 || !m_conf->ok()) {
|
144 |
string where;
|
186 |
string where;
|
145 |
stringsToString(m_cdirs, where);
|
187 |
stringsToString(m_cdirs, where);
|
146 |
m_reason = string("No/bad main configuration file in: ") + where;
|
188 |
m_reason = string("No/bad main configuration file in: ") + where;
|
147 |
m_ok = false;
|
189 |
m_ok = false;
|
|
|
190 |
m_skpnstate.init(this, 0, "skippedNames");
|
|
|
191 |
m_rmtstate.init(this, 0, "indexedmimetypes");
|
148 |
return false;
|
192 |
return false;
|
149 |
}
|
193 |
}
|
150 |
setKeyDir("");
|
194 |
setKeyDir("");
|
151 |
bool nocjk = false;
|
195 |
bool nocjk = false;
|
152 |
if (getConfParam("nocjk", &nocjk) && nocjk == true) {
|
196 |
if (getConfParam("nocjk", &nocjk) && nocjk == true) {
|
|
... |
|
... |
157 |
TextSplit::cjkProcessing(true, (unsigned int)ngramlen);
|
201 |
TextSplit::cjkProcessing(true, (unsigned int)ngramlen);
|
158 |
} else {
|
202 |
} else {
|
159 |
TextSplit::cjkProcessing(true);
|
203 |
TextSplit::cjkProcessing(true);
|
160 |
}
|
204 |
}
|
161 |
}
|
205 |
}
|
|
|
206 |
m_skpnstate.init(this, m_conf, "skippedNames");
|
|
|
207 |
m_rmtstate.init(this, m_conf, "indexedmimetypes");
|
162 |
return true;
|
208 |
return true;
|
163 |
}
|
209 |
}
|
164 |
|
210 |
|
165 |
ConfNull *RclConfig::cloneMainConfig()
|
211 |
ConfNull *RclConfig::cloneMainConfig()
|
166 |
{
|
212 |
{
|
|
... |
|
... |
174 |
|
220 |
|
175 |
// Remember what directory we're under (for further conf->get()s), and
|
221 |
// Remember what directory we're under (for further conf->get()s), and
|
176 |
// prefetch a few common values.
|
222 |
// prefetch a few common values.
|
177 |
void RclConfig::setKeyDir(const string &dir)
|
223 |
void RclConfig::setKeyDir(const string &dir)
|
178 |
{
|
224 |
{
|
|
|
225 |
if (!dir.compare(m_keydir))
|
|
|
226 |
return;
|
|
|
227 |
|
|
|
228 |
m_keydirgen++;
|
179 |
m_keydir = dir;
|
229 |
m_keydir = dir;
|
180 |
if (m_conf == 0)
|
230 |
if (m_conf == 0)
|
181 |
return;
|
231 |
return;
|
182 |
|
232 |
|
183 |
if (!m_conf->get("defaultcharset", defcharset, m_keydir))
|
233 |
if (!m_conf->get("defaultcharset", defcharset, m_keydir))
|
184 |
defcharset.erase();
|
234 |
defcharset.erase();
|
185 |
|
235 |
|
186 |
getConfParam("guesscharset", &guesscharset);
|
236 |
getConfParam("guesscharset", &guesscharset);
|
187 |
|
|
|
188 |
string rmtstr;
|
|
|
189 |
if (m_conf->get("indexedmimetypes", rmtstr, m_keydir)) {
|
|
|
190 |
stringtolower(rmtstr);
|
|
|
191 |
if (rmtstr != m_rmtstr) {
|
|
|
192 |
LOGDEB2(("RclConfig::setKeyDir: rmtstr [%s]\n", rmtstr.c_str()));
|
|
|
193 |
m_rmtstr = rmtstr;
|
|
|
194 |
list<string> l;
|
|
|
195 |
// Yea, no good to go string->list->set. Lazy me.
|
|
|
196 |
stringToStrings(rmtstr, l);
|
|
|
197 |
for (list<string>::iterator it = l.begin(); it !=l.end(); it++) {
|
|
|
198 |
m_restrictMTypes.insert(*it);
|
|
|
199 |
}
|
|
|
200 |
}
|
|
|
201 |
}
|
|
|
202 |
}
|
237 |
}
|
203 |
|
238 |
|
204 |
bool RclConfig::getConfParam(const std::string &name, int *ivp)
|
239 |
bool RclConfig::getConfParam(const std::string &name, int *ivp)
|
205 |
{
|
240 |
{
|
206 |
string value;
|
241 |
string value;
|
|
... |
|
... |
248 |
return tdl;
|
283 |
return tdl;
|
249 |
}
|
284 |
}
|
250 |
|
285 |
|
251 |
// Get charset to be used for transcoding to utf-8 if unspecified by doc
|
286 |
// Get charset to be used for transcoding to utf-8 if unspecified by doc
|
252 |
// For document contents:
|
287 |
// For document contents:
|
253 |
// If defcharset was set (from the config or a previous call), use it.
|
288 |
// If defcharset was set (from the config or a previous call, this
|
|
|
289 |
// is done in setKeydir), use it.
|
254 |
// Else, try to guess it from the locale
|
290 |
// Else, try to guess it from the locale
|
255 |
// Use iso8859-1 as ultimate default
|
291 |
// Use iso8859-1 as ultimate default
|
256 |
// defcharset is reset on setKeyDir()
|
292 |
//
|
257 |
// For filenames, same thing except that we do not use the config file value
|
293 |
// For filenames, same thing except that we do not use the config file value
|
258 |
// (only the locale).
|
294 |
// (only the locale).
|
259 |
const string& RclConfig::getDefCharset(bool filename)
|
295 |
const string& RclConfig::getDefCharset(bool filename)
|
260 |
{
|
296 |
{
|
261 |
static string localecharset; // This supposedly never changes
|
297 |
// This can't change once computed inside a process.
|
|
|
298 |
static string localecharset;
|
262 |
if (localecharset.empty()) {
|
299 |
if (localecharset.empty()) {
|
263 |
const char *cp;
|
300 |
const char *cp;
|
264 |
cp = nl_langinfo(CODESET);
|
301 |
cp = nl_langinfo(CODESET);
|
265 |
// We don't keep US-ASCII. It's better to use a superset
|
302 |
// We don't keep US-ASCII. It's better to use a superset
|
266 |
// Ie: me have a C locale and some french file names, and I
|
303 |
// Ie: me have a C locale and some french file names, and I
|
|
... |
|
... |
299 |
std::list<string> RclConfig::getAllMimeTypes()
|
336 |
std::list<string> RclConfig::getAllMimeTypes()
|
300 |
{
|
337 |
{
|
301 |
std::list<string> lst;
|
338 |
std::list<string> lst;
|
302 |
if (mimeconf == 0)
|
339 |
if (mimeconf == 0)
|
303 |
return lst;
|
340 |
return lst;
|
304 |
// mimeconf->sortwalk(mtypesWalker, &lst);
|
|
|
305 |
lst = mimeconf->getNames("index");
|
341 |
lst = mimeconf->getNames("index");
|
306 |
lst.sort();
|
342 |
lst.sort();
|
307 |
lst.unique();
|
343 |
lst.unique();
|
308 |
return lst;
|
344 |
return lst;
|
309 |
}
|
345 |
}
|
|
... |
|
... |
347 |
|
383 |
|
348 |
#define STOPSUFFIXES ((SuffixStore *)m_stopsuffixes)
|
384 |
#define STOPSUFFIXES ((SuffixStore *)m_stopsuffixes)
|
349 |
|
385 |
|
350 |
bool RclConfig::inStopSuffixes(const string& fni)
|
386 |
bool RclConfig::inStopSuffixes(const string& fni)
|
351 |
{
|
387 |
{
|
352 |
if (m_stopsuffixes == 0) {
|
388 |
if (m_stopsuffixes == 0 || m_stpsuffstate.needrecompute()) {
|
353 |
// Need to initialize the suffixes
|
389 |
// Need to initialize the suffixes
|
|
|
390 |
delete STOPSUFFIXES;
|
354 |
if ((m_stopsuffixes = new SuffixStore) == 0) {
|
391 |
if ((m_stopsuffixes = new SuffixStore) == 0) {
|
355 |
LOGERR(("RclConfig::inStopSuffixes: out of memory\n"));
|
392 |
LOGERR(("RclConfig::inStopSuffixes: out of memory\n"));
|
356 |
return false;
|
393 |
return false;
|
357 |
}
|
394 |
}
|
358 |
string stp;
|
|
|
359 |
list<string> stoplist;
|
395 |
list<string> stoplist;
|
360 |
if (mimemap && mimemap->get("recoll_noindex", stp, m_keydir)) {
|
396 |
stringToStrings(m_stpsuffstate.savedvalue, stoplist);
|
361 |
stringToStrings(stp, stoplist);
|
|
|
362 |
}
|
|
|
363 |
for (list<string>::const_iterator it = stoplist.begin();
|
397 |
for (list<string>::const_iterator it = stoplist.begin();
|
364 |
it != stoplist.end(); it++) {
|
398 |
it != stoplist.end(); it++) {
|
365 |
string lower(*it);
|
|
|
366 |
stringtolower(lower);
|
|
|
367 |
STOPSUFFIXES->insert(SfString(lower));
|
399 |
STOPSUFFIXES->insert(SfString(stringtolower(*it)));
|
368 |
if (m_maxsufflen < lower.length())
|
400 |
if (m_maxsufflen < it->length())
|
369 |
m_maxsufflen = lower.length();
|
401 |
m_maxsufflen = it->length();
|
370 |
}
|
402 |
}
|
371 |
}
|
403 |
}
|
372 |
|
404 |
|
373 |
// Only need a tail as long as the longest suffix.
|
405 |
// Only need a tail as long as the longest suffix.
|
374 |
int pos = MAX(0, int(fni.length() - m_maxsufflen));
|
406 |
int pos = MAX(0, int(fni.length() - m_maxsufflen));
|
|
... |
|
... |
442 |
}
|
474 |
}
|
443 |
|
475 |
|
444 |
string RclConfig::getMimeHandlerDef(const std::string &mtype, bool filtertypes)
|
476 |
string RclConfig::getMimeHandlerDef(const std::string &mtype, bool filtertypes)
|
445 |
{
|
477 |
{
|
446 |
string hs;
|
478 |
string hs;
|
|
|
479 |
if (filtertypes && m_rmtstate.needrecompute()) {
|
|
|
480 |
m_restrictMTypes.clear();
|
|
|
481 |
stringToStrings(stringtolower((const string&)m_rmtstate.savedvalue),
|
|
|
482 |
m_restrictMTypes);
|
|
|
483 |
}
|
447 |
if (filtertypes && !m_restrictMTypes.empty()) {
|
484 |
if (filtertypes && !m_restrictMTypes.empty()) {
|
448 |
string mt = mtype;
|
485 |
string mt = mtype;
|
449 |
stringtolower(mt);
|
486 |
stringtolower(mt);
|
450 |
if (m_restrictMTypes.find(mt) == m_restrictMTypes.end())
|
487 |
if (m_restrictMTypes.find(mt) == m_restrictMTypes.end())
|
451 |
return hs;
|
488 |
return hs;
|
452 |
}
|
489 |
}
|
453 |
if (!mimeconf->get(mtype, hs, "index")) {
|
490 |
if (!mimeconf->get(mtype, hs, "index")) {
|
454 |
LOGDEB1(("getMimeHandler: no handler for '%s'\n", mtype.c_str()));
|
491 |
LOGDEB1(("getMimeHandler: no handler for '%s'\n", mtype.c_str()));
|
455 |
}
|
492 |
}
|
456 |
return hs;
|
493 |
return hs;
|
457 |
}
|
494 |
}
|
|
|
495 |
|
458 |
string RclConfig::getMissingHelperDesc()
|
496 |
string RclConfig::getMissingHelperDesc()
|
459 |
{
|
497 |
{
|
460 |
string fmiss = path_cat(getConfDir(), "missing");
|
498 |
string fmiss = path_cat(getConfDir(), "missing");
|
461 |
string out;
|
499 |
string out;
|
462 |
file_to_string(fmiss, out);
|
500 |
file_to_string(fmiss, out);
|
463 |
return out;
|
501 |
return out;
|
464 |
}
|
502 |
}
|
|
|
503 |
|
465 |
void RclConfig::storeMissingHelperDesc(const string &s)
|
504 |
void RclConfig::storeMissingHelperDesc(const string &s)
|
466 |
{
|
505 |
{
|
467 |
string fmiss = path_cat(getConfDir(), "missing");
|
506 |
string fmiss = path_cat(getConfDir(), "missing");
|
468 |
FILE *fp = fopen(fmiss.c_str(), "w");
|
507 |
FILE *fp = fopen(fmiss.c_str(), "w");
|
469 |
if (fp) {
|
508 |
if (fp) {
|
|
... |
|
... |
707 |
string RclConfig::getStopfile()
|
746 |
string RclConfig::getStopfile()
|
708 |
{
|
747 |
{
|
709 |
return path_cat(getConfDir(), "stoplist.txt");
|
748 |
return path_cat(getConfDir(), "stoplist.txt");
|
710 |
}
|
749 |
}
|
711 |
|
750 |
|
712 |
list<string> RclConfig::getSkippedNames()
|
751 |
list<string>& RclConfig::getSkippedNames()
|
713 |
{
|
752 |
{
|
714 |
list<string> skpl;
|
753 |
if (m_skpnstate.needrecompute()) {
|
715 |
string skipped;
|
754 |
stringToStrings(m_skpnstate.savedvalue, m_skpnlist);
|
716 |
if (getConfParam("skippedNames", skipped)) {
|
|
|
717 |
stringToStrings(skipped, skpl);
|
|
|
718 |
}
|
755 |
}
|
719 |
return skpl;
|
756 |
return m_skpnlist;
|
720 |
}
|
757 |
}
|
721 |
|
758 |
|
722 |
list<string> RclConfig::getSkippedPaths()
|
759 |
list<string> RclConfig::getSkippedPaths()
|
723 |
{
|
760 |
{
|
724 |
list<string> skpl;
|
761 |
list<string> skpl;
|
725 |
string skipped;
|
762 |
string skipped;
|
726 |
if (getConfParam("skippedPaths", skipped)) {
|
763 |
if (getConfParam("skippedPaths", skipped)) {
|
727 |
stringToStrings(skipped, skpl);
|
764 |
stringToStrings(skipped, skpl);
|
728 |
}
|
765 |
}
|
729 |
// Always add the dbdir and confdir to the skipped paths
|
766 |
// Always add the dbdir and confdir to the skipped paths. This is
|
|
|
767 |
// especially important for the rt monitor which will go into a loop if we
|
|
|
768 |
// don't do this.
|
730 |
skpl.push_back(getDbDir());
|
769 |
skpl.push_back(getDbDir());
|
731 |
skpl.push_back(getConfDir());
|
770 |
skpl.push_back(getConfDir());
|
732 |
for (list<string>::iterator it = skpl.begin(); it != skpl.end(); it++) {
|
771 |
for (list<string>::iterator it = skpl.begin(); it != skpl.end(); it++) {
|
733 |
*it = path_tildexpand(*it);
|
772 |
*it = path_tildexpand(*it);
|
734 |
*it = path_canon(*it);
|
773 |
*it = path_canon(*it);
|
|
... |
|
... |
914 |
if (r.m_stopsuffixes)
|
953 |
if (r.m_stopsuffixes)
|
915 |
m_stopsuffixes = new SuffixStore(*((SuffixStore*)r.m_stopsuffixes));
|
954 |
m_stopsuffixes = new SuffixStore(*((SuffixStore*)r.m_stopsuffixes));
|
916 |
m_maxsufflen = r.m_maxsufflen;
|
955 |
m_maxsufflen = r.m_maxsufflen;
|
917 |
defcharset = r.defcharset;
|
956 |
defcharset = r.defcharset;
|
918 |
guesscharset = r.guesscharset;
|
957 |
guesscharset = r.guesscharset;
|
919 |
m_rmtstr = r.m_rmtstr;
|
958 |
|
920 |
m_restrictMTypes = r.m_restrictMTypes;
|
959 |
m_stpsuffstate.init(this, mimemap, r.m_stpsuffstate.paramname);
|
|
|
960 |
m_skpnstate.init(this, m_conf, r.m_skpnstate.paramname);
|
|
|
961 |
m_rmtstate.init(this, m_conf, r.m_rmtstate.paramname);
|
921 |
}
|
962 |
}
|
922 |
|
963 |
|
923 |
#else // -> Test
|
964 |
#else // -> Test
|
924 |
|
965 |
|
925 |
#include <stdio.h>
|
966 |
#include <stdio.h>
|