|
a/src/query/plaintorich.cpp |
|
b/src/query/plaintorich.cpp |
|
... |
|
... |
40 |
#include "textsplit.h"
|
40 |
#include "textsplit.h"
|
41 |
#include "utf8iter.h"
|
41 |
#include "utf8iter.h"
|
42 |
#include "smallut.h"
|
42 |
#include "smallut.h"
|
43 |
#include "plaintorich.h"
|
43 |
#include "plaintorich.h"
|
44 |
#include "cancelcheck.h"
|
44 |
#include "cancelcheck.h"
|
|
|
45 |
#include "unacpp.h"
|
45 |
|
46 |
|
46 |
const string PlainToRich::snull = "";
|
47 |
const string PlainToRich::snull = "";
|
47 |
|
48 |
|
48 |
// For debug printing
|
49 |
// For debug printing
|
49 |
static string vecStringToString(const vector<string>& t)
|
50 |
static string vecStringToString(const vector<string>& t)
|
|
... |
|
... |
82 |
}
|
83 |
}
|
83 |
|
84 |
|
84 |
// Callback called by the text-to-words breaker for each word
|
85 |
// Callback called by the text-to-words breaker for each word
|
85 |
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
86 |
virtual bool takeword(const std::string& term, int pos, int bts, int bte) {
|
86 |
string dumb;
|
87 |
string dumb;
|
87 |
Rcl::dumb_string(term, dumb);
|
88 |
if (!unacmaybefold(term, dumb, "UTF-8", true)) {
|
|
|
89 |
LOGINFO(("PlainToRich::splitter::takeword: unac failed for [%s]\n", term.c_str()));
|
|
|
90 |
return true;
|
|
|
91 |
}
|
88 |
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
92 |
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
89 |
// pos, bts, bte));
|
93 |
// pos, bts, bte));
|
90 |
|
94 |
|
91 |
// If this word is a search term, remember its byte-offset span.
|
95 |
// If this word is a search term, remember its byte-offset span.
|
92 |
if (m_terms.find(dumb) != m_terms.end()) {
|
96 |
if (m_terms.find(dumb) != m_terms.end()) {
|