|
a/src/rcldb/rcldb.h |
|
b/src/rcldb/rcldb.h |
|
... |
|
... |
52 |
// etc. "udi". Our user is responsible for making sure it's not too
|
52 |
// etc. "udi". Our user is responsible for making sure it's not too
|
53 |
// big, cause it's stored as a Xapian term (< 150 bytes would be
|
53 |
// big, cause it's stored as a Xapian term (< 150 bytes would be
|
54 |
// reasonable)
|
54 |
// reasonable)
|
55 |
|
55 |
|
56 |
class RclConfig;
|
56 |
class RclConfig;
|
|
|
57 |
class Aspell;
|
57 |
|
58 |
|
58 |
namespace Rcl {
|
59 |
namespace Rcl {
|
59 |
|
60 |
|
60 |
// Omega compatible values. We leave a hole for future omega values. Not sure
|
61 |
// Omega compatible values. We leave a hole for future omega values. Not sure
|
61 |
// it makes any sense to keep any level of omega compat given that the index
|
62 |
// it makes any sense to keep any level of omega compat given that the index
|
|
... |
|
... |
198 |
/** Return existing stemming databases */
|
199 |
/** Return existing stemming databases */
|
199 |
vector<string> getStemLangs();
|
200 |
vector<string> getStemLangs();
|
200 |
|
201 |
|
201 |
/** Test word for spelling correction candidate: not too long, no
|
202 |
/** Test word for spelling correction candidate: not too long, no
|
202 |
special chars... */
|
203 |
special chars... */
|
203 |
static bool isSpellingCandidate(const string& term)
|
204 |
static bool isSpellingCandidate(const string& term, bool aspell=true)
|
204 |
{
|
205 |
{
|
205 |
if (term.empty() || term.length() > 50)
|
206 |
if (term.empty() || term.length() > 50)
|
206 |
return false;
|
207 |
return false;
|
207 |
if (has_prefix(term))
|
208 |
if (has_prefix(term))
|
208 |
return false;
|
209 |
return false;
|
209 |
Utf8Iter u8i(term);
|
210 |
Utf8Iter u8i(term);
|
210 |
if (TextSplit::isCJK(*u8i))
|
211 |
if (aspell) {
|
211 |
return false;
|
212 |
if (TextSplit::isCJK(*u8i) || TextSplit::isKATAKANA(*u8i))
|
|
|
213 |
return false;
|
|
|
214 |
} else {
|
|
|
215 |
if (!TextSplit::isKATAKANA(*u8i)) {
|
|
|
216 |
return false;
|
|
|
217 |
}
|
|
|
218 |
}
|
212 |
if (term.find_first_of(" !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~")
|
219 |
if (term.find_first_of(" !\"#$%&()*+,-./0123456789:;<=>?@[\\]^_`{|}~")
|
213 |
!= string::npos)
|
220 |
!= string::npos)
|
214 |
return false;
|
221 |
return false;
|
215 |
return true;
|
222 |
return true;
|
216 |
}
|
223 |
}
|
217 |
|
224 |
|
218 |
|
|
|
219 |
#ifdef TESTING_XAPIAN_SPELL
|
|
|
220 |
/** Return spelling suggestion */
|
225 |
/** Return spelling suggestion */
|
221 |
string getSpellingSuggestion(const string& word);
|
226 |
bool getSpellingSuggestions(const string& word,
|
222 |
#endif
|
227 |
std::vector<std::string>& suggs);
|
223 |
|
228 |
|
224 |
/* The next two, only for searchdata, should be somehow hidden */
|
229 |
/* The next two, only for searchdata, should be somehow hidden */
|
225 |
/* Return configured stop words */
|
230 |
/* Return configured stop words */
|
226 |
const StopList& getStopList() const {return m_stops;}
|
231 |
const StopList& getStopList() const {return m_stops;}
|
227 |
/* Field name to prefix translation (ie: author -> 'A') */
|
232 |
/* Field name to prefix translation (ie: author -> 'A') */
|
|
... |
|
... |
488 |
// a file may be expensive and it's unlikely to change with every
|
493 |
// a file may be expensive and it's unlikely to change with every
|
489 |
// query, so it makes sense to cache it, and Rcl::Db is not a bad
|
494 |
// query, so it makes sense to cache it, and Rcl::Db is not a bad
|
490 |
// place for this.
|
495 |
// place for this.
|
491 |
SynGroups m_syngroups;
|
496 |
SynGroups m_syngroups;
|
492 |
|
497 |
|
|
|
498 |
// Aspell object if needed
|
|
|
499 |
Aspell *m_aspell = nullptr;
|
|
|
500 |
|
493 |
/***************
|
501 |
/***************
|
494 |
* Parameters cached out of the configuration files. Logically const
|
502 |
* Parameters cached out of the configuration files. Logically const
|
495 |
* after init */
|
503 |
* after init */
|
496 |
// Stop terms: those don't get indexed.
|
504 |
// Stop terms: those don't get indexed.
|
497 |
StopList m_stops;
|
505 |
StopList m_stops;
|